diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f3058ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +# 依赖目录 +node_modules/ + +# 日志文件 +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* + +# 环境变量文件 +.env +.env.local +.env.*.local + +# 编辑器目录和文件 +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# 操作系统文件 +.DS_Store +Thumbs.db + +# 构建输出 +dist/ +build/ +*.log + +# 临时文件 +*.tmp +.cache/ diff --git a/README.md b/README.md index 0204836..079faa2 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,17 @@ -# 南京公共工程建设中心 - 公告抓取工具 +# 南京公共工程建设中心 - 公告采集工具 -一个用于抓取南京公共工程建设中心公告信息的 Web 可视化工具。 +一个用于采集南京公共工程建设中心公告信息的 Web 可视化工具。 ## 功能特性 -- ✅ 抓取公告列表(支持分页) -- ✅ 按时间范围智能抓取 -- ✅ 抓取公告详情内容 +- ✅ 采集公告列表(支持分页) +- ✅ 按时间范围智能采集 +- ✅ 采集公告详情内容 - ✅ 智能提取预算金额 - ✅ 生成统计报告 -- ✅ Web可视化界面 -- ✅ 导出Word/Markdown报告 -- ✅ RESTful API支持 +- ✅ Web 可视化界面 +- ✅ 导出 Word/Markdown 报告 +- ✅ RESTful API 支持 ## 安装 @@ -34,21 +34,24 @@ npm start ### 3. 功能介绍 **公告列表标签** + - 快速查看所有公告 - 支持分页浏览 - 一键获取最新公告列表 -**详情抓取标签** -- 批量抓取公告详情 -- 支持按时间范围抓取 +**详情采集标签** + +- 批量采集公告详情 +- 支持按时间范围采集 - 自动提取预算金额 -- 可自定义抓取数量 +- 可自定义采集数量 **生成报告标签** + - 支持按时间范围生成报告 - 设置金额阈值筛选项目 - 实时统计项目信息 -- 一键导出Word/Markdown报告 +- 一键导出 Word/Markdown 报告 ## 报告示例 @@ -60,8 +63,8 @@ npm start ## 统计摘要 - 总项目数: 10 -- 超过50万元的项目: 3 -- 总金额: 5395.50万元 +- 超过 50 万元的项目: 3 +- 总金额: 5395.50 万元 ## 项目列表 @@ -69,7 +72,7 @@ npm start - **发布日期**: 2025-12-12 - **发布时间**: 2025-12-12 10:35:00 -- **预算金额**: 5000万元 +- **预算金额**: 5000 万元 - **链接**: https://... ``` @@ -78,14 +81,18 @@ npm start 服务器启动后提供以下 RESTful API 接口: ### 1. 获取公告列表 + ``` GET /api/list?url=<列表页URL>&page=<页码> ``` + 参数: -- `url` (可选): 列表页URL,默认为官网首页 -- `page` (可选): 页码,默认为1 + +- `url` (可选): 列表页 URL,默认为官网首页 +- `page` (可选): 页码,默认为 1 ### 2. 按时间范围获取列表 + ``` POST /api/list-daterange Content-Type: application/json @@ -98,6 +105,7 @@ Content-Type: application/json ``` ### 3. 批量获取详情 + ``` POST /api/details Content-Type: application/json @@ -109,6 +117,7 @@ Content-Type: application/json ``` ### 4. 生成报告 + ``` POST /api/report Content-Type: application/json @@ -121,6 +130,7 @@ Content-Type: application/json ``` ### 5. 按时间范围生成报告 + ``` POST /api/report-daterange Content-Type: application/json @@ -137,8 +147,8 @@ Content-Type: application/json - **后端**: Node.js + Express - **爬虫**: Axios + Cheerio -- **前端**: 原生HTML/CSS/JavaScript -- **编码处理**: iconv-lite (支持GBK/UTF-8) +- **前端**: 原生 HTML/CSS/JavaScript +- **编码处理**: iconv-lite (支持 GBK/UTF-8) - **文档导出**: docx.js ## 项目结构 @@ -156,61 +166,69 @@ Content-Type: application/json ## 注意事项 -1. 抓取速度已限制为每条延迟500ms-1s,避免请求过快 +1. 采集速度已限制为每条延迟 500ms-1s,避免请求过快 2. 仅支持 gjzx.nanjing.gov.cn 域名的详情页解析 3. 金额提取基于正则匹配,支持多种格式(预算金额、最高限价等) -4. Web服务器默认端口3000,可在 server.js 中修改 -5. 按时间范围抓取会在检测到所有公告早于起始日期时自动停止 -6. 编码自动识别,支持GBK和UTF-8网页 +4. Web 服务器默认端口 3000,可在 server.js 中修改 +5. 按时间范围采集会在检测到所有公告早于起始日期时自动停止 +6. 编码自动识别,支持 GBK 和 UTF-8 网页 ## 核心功能说明 -### 时间范围抓取逻辑 +### 时间范围采集逻辑 -按时间范围抓取时,程序会: -1. 从第一页开始顺序抓取 +按时间范围采集时,程序会: + +1. 从第一页开始顺序采集 2. 检查每页公告的日期是否在指定范围内 -3. 如果某页所有公告都早于起始日期,自动停止抓取 -4. 支持设置最大页数限制,避免过度抓取 +3. 如果某页所有公告都早于起始日期,自动停止采集 +4. 支持设置最大页数限制,避免过度采集 ### 金额提取规则 支持识别以下格式: -- 预算金额: XX万元 -- 最高限价: XX万元 -- 预算: XX万元 -- 金额: XX万元 -- 直接数字: XX万元 + +- 预算金额: XX 万元 +- 最高限价: XX 万元 +- 预算: XX 万元 +- 金额: XX 万元 +- 直接数字: XX 万元 ### 编码处理 自动识别网页编码: + - 优先读取 Content-Type 中的 charset - 自动处理 GBK、GB2312 编码 - 默认使用 UTF-8 ## 常见问题 -### Q: 为什么抓取速度比较慢? -A: 为了避免对服务器造成过大压力,程序限制了请求频率(每条延迟500ms-1s)。这是一个负责任的爬虫设计。 +### Q: 为什么采集速度比较慢? -### Q: 如何抓取指定日期范围的公告? -A: 在Web界面的"详情抓取"和"生成报告"标签中勾选"按时间范围抓取",然后输入起始和结束日期即可。 +A: 为了避免对服务器造成过大压力,程序限制了请求频率(每条延迟 500ms-1s)。这是一个负责任的爬虫设计。 + +### Q: 如何采集指定日期范围的公告? + +A: 在 Web 界面的"详情采集"和"生成报告"标签中勾选"按时间范围采集",然后输入起始和结束日期即可。 ### Q: 导出的报告在哪里? -A: 点击"导出Word"或"导出Markdown"按钮后会自动下载到浏览器的默认下载目录。 -### Q: 可以抓取其他网站吗? -A: 需要修改 server.js 中的 BASE_URL 和相应的解析函数,因为不同网站的HTML结构不同。 +A: 点击"导出 Word"或"导出 Markdown"按钮后会自动下载到浏览器的默认下载目录。 + +### Q: 可以采集其他网站吗? + +A: 需要修改 server.js 中的 BASE_URL 和相应的解析函数,因为不同网站的 HTML 结构不同。 ## 更新日志 ### v1.0.0 (2025-12-12) -- Web可视化界面 -- 支持按时间范围抓取 + +- Web 可视化界面 +- 支持按时间范围采集 - 支持分页浏览 -- 支持导出Word/Markdown报告 -- RESTful API接口 +- 支持导出 Word/Markdown 报告 +- RESTful API 接口 - 自动编码识别 - 智能金额提取 diff --git a/node_modules/.package-lock.json b/node_modules/.package-lock.json index 88c0cb4..96859aa 100644 --- a/node_modules/.package-lock.json +++ b/node_modules/.package-lock.json @@ -4,6 +4,46 @@ "lockfileVersion": 3, "requires": true, "packages": { + "node_modules/@napi-rs/canvas": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas/-/canvas-0.1.80.tgz", + "integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==", + "license": "MIT", + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.80", + "@napi-rs/canvas-darwin-arm64": "0.1.80", + "@napi-rs/canvas-darwin-x64": "0.1.80", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.80", + "@napi-rs/canvas-linux-arm64-musl": "0.1.80", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-musl": "0.1.80", + "@napi-rs/canvas-win32-x64-msvc": "0.1.80" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz", + "integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@types/node": { "version": "24.10.3", "resolved": "https://registry.npmmirror.com/@types/node/-/node-24.10.3.tgz", @@ -971,6 +1011,15 @@ "node": ">= 0.6" } }, + "node_modules/nodemailer": { + "version": "7.0.11", + "resolved": "https://registry.npmmirror.com/nodemailer/-/nodemailer-7.0.11.tgz", + "integrity": "sha512-gnXhNRE0FNhD7wPSCGhdNh46Hs6nm+uTyg+Kq0cZukNQiYdnCsoQjodNP9BQVG9XrcK/v6/MgpAPBUFyzh9pvw==", + "license": "MIT-0", + "engines": { + "node": ">=6.0.0" + } + }, "node_modules/nth-check": { "version": "2.1.1", "resolved": "https://registry.npmmirror.com/nth-check/-/nth-check-2.1.1.tgz", @@ -1099,6 +1148,38 @@ "url": "https://opencollective.com/express" } }, + "node_modules/pdf-parse": { + "version": "2.4.5", + "resolved": "https://registry.npmmirror.com/pdf-parse/-/pdf-parse-2.4.5.tgz", + "integrity": "sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==", + "license": "Apache-2.0", + "dependencies": { + "@napi-rs/canvas": "0.1.80", + "pdfjs-dist": "5.4.296" + }, + "bin": { + "pdf-parse": "bin/cli.mjs" + }, + "engines": { + "node": ">=20.16.0 <21 || >=22.3.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/mehmet-kozan" + } + }, + "node_modules/pdfjs-dist": { + "version": "5.4.296", + "resolved": "https://registry.npmmirror.com/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz", + "integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=20.16.0 || >=22.3.0" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.80" + } + }, "node_modules/process-nextick-args": { "version": "2.0.1", "resolved": "https://registry.npmmirror.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz", diff --git a/package-lock.json b/package-lock.json index 87c19fa..a5988dd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,193 @@ "cors": "^2.8.5", "docx": "^9.5.1", "express": "^5.2.1", - "iconv-lite": "^0.6.3" + "iconv-lite": "^0.6.3", + "nodemailer": "^7.0.11", + "pdf-parse": "^2.4.5" + } + }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas/-/canvas-0.1.80.tgz", + "integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==", + "license": "MIT", + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.80", + "@napi-rs/canvas-darwin-arm64": "0.1.80", + "@napi-rs/canvas-darwin-x64": "0.1.80", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.80", + "@napi-rs/canvas-linux-arm64-musl": "0.1.80", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-musl": "0.1.80", + "@napi-rs/canvas-win32-x64-msvc": "0.1.80" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.80.tgz", + "integrity": "sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.80.tgz", + "integrity": "sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.80.tgz", + "integrity": "sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.80.tgz", + "integrity": "sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.80.tgz", + "integrity": "sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.80.tgz", + "integrity": "sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.80.tgz", + "integrity": "sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.80.tgz", + "integrity": "sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.80.tgz", + "integrity": "sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.80", + "resolved": "https://registry.npmmirror.com/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz", + "integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" } }, "node_modules/@types/node": { @@ -983,6 +1169,15 @@ "node": ">= 0.6" } }, + "node_modules/nodemailer": { + "version": "7.0.11", + "resolved": "https://registry.npmmirror.com/nodemailer/-/nodemailer-7.0.11.tgz", + "integrity": "sha512-gnXhNRE0FNhD7wPSCGhdNh46Hs6nm+uTyg+Kq0cZukNQiYdnCsoQjodNP9BQVG9XrcK/v6/MgpAPBUFyzh9pvw==", + "license": "MIT-0", + "engines": { + "node": ">=6.0.0" + } + }, "node_modules/nth-check": { "version": "2.1.1", "resolved": "https://registry.npmmirror.com/nth-check/-/nth-check-2.1.1.tgz", @@ -1111,6 +1306,38 @@ "url": "https://opencollective.com/express" } }, + "node_modules/pdf-parse": { + "version": "2.4.5", + "resolved": "https://registry.npmmirror.com/pdf-parse/-/pdf-parse-2.4.5.tgz", + "integrity": "sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==", + "license": "Apache-2.0", + "dependencies": { + "@napi-rs/canvas": "0.1.80", + "pdfjs-dist": "5.4.296" + }, + "bin": { + "pdf-parse": "bin/cli.mjs" + }, + "engines": { + "node": ">=20.16.0 <21 || >=22.3.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/mehmet-kozan" + } + }, + "node_modules/pdfjs-dist": { + "version": "5.4.296", + "resolved": "https://registry.npmmirror.com/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz", + "integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=20.16.0 || >=22.3.0" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.80" + } + }, "node_modules/process-nextick-args": { "version": "2.0.1", "resolved": "https://registry.npmmirror.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz", diff --git a/package.json b/package.json index 8818081..3a4a6a8 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "gjzx-scraper", "version": "1.0.0", "type": "module", - "description": "工具:抓取 https://gjzx.nanjing.gov.cn/gggs/ 公示列表信息及详情", + "description": "工具:采集 https://gjzx.nanjing.gov.cn/gggs/ 公示列表信息及详情", "main": "src/server.js", "scripts": { "start": "node src/server.js" @@ -13,6 +13,7 @@ "cors": "^2.8.5", "docx": "^9.5.1", "express": "^5.2.1", - "iconv-lite": "^0.6.3" + "iconv-lite": "^0.6.3", + "nodemailer": "^7.0.11" } } diff --git a/public/app.js b/public/app.js index 9294624..1d3733a 100644 --- a/public/app.js +++ b/public/app.js @@ -134,11 +134,11 @@ async function fetchDetails() { listData = await dateRangeResponse.json(); } else { - // 普通模式 - 按数量抓取多页 + // 普通模式 - 按数量采集多页 const url = document.getElementById('detailUrl').value; const limit = parseInt(document.getElementById('detailLimit').value); - // 抓取多页直到获得足够数量 + // 采集多页直到获得足够数量 const allItems = []; let page = 1; const maxPagesToFetch = Math.ceil(limit / 10) + 1; // 假设每页约10条 @@ -177,7 +177,7 @@ async function fetchDetails() { return; } - // 抓取详情 + // 采集详情 const limit = useDetailDateRange ? listData.data.length : parseInt(document.getElementById('detailLimit').value); const detailResponse = await fetch(`${API_BASE}/details`, { method: 'POST', @@ -202,7 +202,7 @@ async function fetchDetails() { function displayDetails(items, container) { const html = `
-

抓取了 ${items.length} 条详情

+

采集了 ${items.length} 条详情

${items.map((item, index) => `

${index + 1}. ${item.title}

@@ -212,7 +212,7 @@ function displayDetails(items, container) { ${item.detail.budget ? ` ${item.detail.budget.amount}${item.detail.budget.unit} ` : '
未找到预算信息
'} - ` : '
抓取失败
'} + ` : '
采集失败
'}
查看原文 →
`).join('')} @@ -271,6 +271,7 @@ async function generateReport() { currentReport = data.data; displayReport(data.data, results); exportBtn.style.display = 'inline-block'; + document.getElementById('sendEmailBtn').style.display = 'inline-block'; } else { results.innerHTML = `
错误: ${data.error}
`; } @@ -475,3 +476,197 @@ async function exportReport() { document.body.removeChild(a); URL.revokeObjectURL(url); } + +// ========== 邮件功能 ========== + +// 页面加载时加载邮件配置 +document.addEventListener('DOMContentLoaded', function() { + loadEmailConfig(); +}); + +// 保存邮件配置到localStorage +function saveEmailConfig() { + const config = { + smtpHost: document.getElementById('smtpHost').value, + smtpPort: parseInt(document.getElementById('smtpPort').value) || 587, + smtpUser: document.getElementById('smtpUser').value, + smtpPass: document.getElementById('smtpPass').value, + recipients: document.getElementById('recipients').value + }; + + // 验证配置 + if (!config.smtpHost || !config.smtpUser || !config.smtpPass || !config.recipients) { + showEmailStatus('请填写所有必填项', 'error'); + return; + } + + // 保存到localStorage + localStorage.setItem('emailConfig', JSON.stringify(config)); + showEmailStatus('邮件配置已保存', 'success'); +} + +// 从localStorage加载邮件配置 +function loadEmailConfig() { + const configStr = localStorage.getItem('emailConfig'); + if (configStr) { + try { + const config = JSON.parse(configStr); + document.getElementById('smtpHost').value = config.smtpHost || ''; + document.getElementById('smtpPort').value = config.smtpPort || 587; + document.getElementById('smtpUser').value = config.smtpUser || ''; + document.getElementById('smtpPass').value = config.smtpPass || ''; + document.getElementById('recipients').value = config.recipients || ''; + } catch (e) { + console.error('加载邮件配置失败:', e); + } + } +} + +// 测试邮件配置 +async function testEmailConfig() { + const config = { + smtpHost: document.getElementById('smtpHost').value, + smtpPort: parseInt(document.getElementById('smtpPort').value) || 587, + smtpUser: document.getElementById('smtpUser').value, + smtpPass: document.getElementById('smtpPass').value, + recipients: document.getElementById('recipients').value + }; + + // 验证配置 + if (!config.smtpHost || !config.smtpUser || !config.smtpPass || !config.recipients) { + showEmailStatus('请填写所有必填项', 'error'); + return; + } + + // 创建测试报告 + const testReport = { + summary: { + total_count: 1, + filtered_count: 1, + threshold: '50万元', + total_amount: '100.00万元', + generated_at: new Date().toISOString() + }, + projects: [{ + title: '这是一封测试邮件', + date: new Date().toLocaleDateString('zh-CN'), + publish_time: new Date().toLocaleString('zh-CN'), + budget: { + amount: 100, + unit: '万元', + text: '测试金额', + originalUnit: '万元' + }, + url: 'https://gjzx.nanjing.gov.cn' + }] + }; + + showEmailStatus('正在发送测试邮件...', 'info'); + + try { + const response = await fetch(`${API_BASE}/send-email`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + emailConfig: config, + report: testReport + }) + }); + + const data = await response.json(); + + if (data.success) { + showEmailStatus('测试邮件发送成功!请检查收件箱', 'success'); + } else { + showEmailStatus(`发送失败: ${data.error}`, 'error'); + } + } catch (error) { + showEmailStatus(`请求失败: ${error.message}`, 'error'); + } +} + +// 发送报告到邮箱 +async function sendReportByEmail() { + if (!currentReport) { + alert('请先生成报告'); + return; + } + + // 从localStorage加载邮件配置 + const configStr = localStorage.getItem('emailConfig'); + if (!configStr) { + alert('请先在"邮件配置"标签页配置邮件服务器'); + return; + } + + let emailConfig; + try { + emailConfig = JSON.parse(configStr); + } catch (e) { + alert('邮件配置格式错误,请重新配置'); + return; + } + + // 验证配置 + if (!emailConfig.smtpHost || !emailConfig.smtpUser || !emailConfig.smtpPass || !emailConfig.recipients) { + alert('邮件配置不完整,请在"邮件配置"标签页检查配置'); + return; + } + + const sendBtn = document.getElementById('sendEmailBtn'); + const originalText = sendBtn.textContent; + sendBtn.disabled = true; + sendBtn.textContent = '正在发送...'; + + try { + const response = await fetch(`${API_BASE}/send-email`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + emailConfig: emailConfig, + report: currentReport + }) + }); + + const data = await response.json(); + + if (data.success) { + alert('报告已成功发送到邮箱!'); + } else { + alert(`发送失败: ${data.error}`); + } + } catch (error) { + alert(`请求失败: ${error.message}`); + } finally { + sendBtn.disabled = false; + sendBtn.textContent = originalText; + } +} + +// 显示邮件配置状态 +function showEmailStatus(message, type) { + const statusDiv = document.getElementById('emailConfigStatus'); + const bgColors = { + success: '#d4edda', + error: '#f8d7da', + info: '#d1ecf1' + }; + const textColors = { + success: '#155724', + error: '#721c24', + info: '#0c5460' + }; + + statusDiv.innerHTML = ` +
+ ${message} +
+ `; + + // 3秒后自动隐藏成功消息 + if (type === 'success') { + setTimeout(() => { + statusDiv.innerHTML = ''; + }, 3000); + } +} diff --git a/public/index.html b/public/index.html index d11be4a..3ff1b9c 100644 --- a/public/index.html +++ b/public/index.html @@ -3,7 +3,7 @@ - 南京公共工程建设中心 - 公告抓取工具 + 南京公共工程建设中心 - 公告采集工具 + + +
+

南京公共工程建设中心 - 采购公告分析报告

+ +
+

报告摘要

+
+
+
总公告数量
+
${summary.total_count} 条
+
+
+
符合条件
+
${summary.filtered_count} 条
+
+
+
金额阈值
+
${summary.threshold}
+
+
+
总金额
+
${summary.total_amount}
+
+
+ ${summary.date_range ? ` +
+
时间范围
+
+ ${summary.date_range.startDate || '不限'} 至 ${summary.date_range.endDate || '不限'} +
+
+ ` : ''} +
+ +

项目详情

+
+ ${projects.length === 0 ? '

暂无符合条件的项目

' : ''} + ${projects.map((project, index) => ` +
+

${index + 1}. ${project.title}

+
+ 发布日期: ${project.date} + ${project.publish_time ? ` | 发布时间: ${project.publish_time}` : ''} +
+ ${project.budget ? ` +
+ 预算金额: ${project.budget.amount.toFixed(2)} ${project.budget.unit} + ${project.budget.originalUnit !== project.budget.unit ? ` (原始: ${project.budget.originalUnit})` : ''} +
+ ` : ''} +
+ ${project.url} +
+
+ `).join('')} +
+ + +
+ + + `; +} diff --git a/src/server.js b/src/server.js index cd74722..680e249 100644 --- a/src/server.js +++ b/src/server.js @@ -3,6 +3,7 @@ import cors from 'cors'; import axios from 'axios'; import * as cheerio from 'cheerio'; import iconv from 'iconv-lite'; +import { sendReportEmail } from './emailService.js'; const app = express(); const PORT = 3000; @@ -33,24 +34,24 @@ function isDateInRange(dateStr, startDate, endDate) { return true; } -// 按时间范围抓取多页列表 +// 按时间范围采集多页列表 async function fetchListByDateRange(startDate, endDate, maxPages = 23) { const allItems = []; let shouldContinue = true; let pageIndex = 0; - console.log(`开始按时间范围抓取: ${startDate || '不限'} 至 ${endDate || '不限'}`); + console.log(`开始按时间范围采集: ${startDate || '不限'} 至 ${endDate || '不限'}`); while (shouldContinue && pageIndex < maxPages) { const pageUrl = getPageUrl(pageIndex); - console.log(`正在抓取第 ${pageIndex + 1} 页: ${pageUrl}`); + console.log(`正在采集第 ${pageIndex + 1} 页: ${pageUrl}`); try { const html = await fetchHtml(pageUrl); const items = parseList(html); if (items.length === 0) { - console.log(`第 ${pageIndex + 1} 页没有数据,停止抓取`); + console.log(`第 ${pageIndex + 1} 页没有数据,停止采集`); break; } @@ -70,7 +71,7 @@ async function fetchListByDateRange(startDate, endDate, maxPages = 23) { } if (allItemsBeforeRange && startDate) { - console.log(`第 ${pageIndex + 1} 页所有项目都早于起始日期,停止抓取`); + console.log(`第 ${pageIndex + 1} 页所有项目都早于起始日期,停止采集`); shouldContinue = false; } @@ -82,12 +83,12 @@ async function fetchListByDateRange(startDate, endDate, maxPages = 23) { await new Promise(resolve => setTimeout(resolve, 500)); } } catch (err) { - console.error(`抓取第 ${pageIndex + 1} 页失败: ${err.message}`); + console.error(`采集第 ${pageIndex + 1} 页失败: ${err.message}`); break; } } - console.log(`总共抓取了 ${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`); + console.log(`总共采集了 ${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`); return allItems; } @@ -207,6 +208,10 @@ function parseDetail(html) { } function extractBudget(content) { + // 预处理内容:去除数字之间的换行符和空白字符 + // 这样可以匹配被换行符分隔的数字,例如 "1\n1\n0\n9\n0\n0" -> "110900" + let cleanedContent = content.replace(/(\d)\s*[\n\r]\s*(?=\d)/g, '$1'); + // 直接定义金额匹配模式(从高优先级到低优先级) const patterns = [ // 优先级1: 带货币符号的万元 @@ -230,7 +235,7 @@ function extractBudget(content) { // 遍历所有模式,找到优先级最高的匹配 for (const pattern of patterns) { - const match = content.match(pattern.regex); + const match = cleanedContent.match(pattern.regex); if (match && pattern.priority < bestPriority) { // 清理数字中的逗号并转换 const numberStr = match[1].replace(/[,,]/g, ''); @@ -329,21 +334,21 @@ app.post('/api/report', async (req, res) => { const { limit = 15, threshold = 50, url } = req.body; const targetUrl = url && url.trim() !== '' ? url : BASE_URL; - // 按需抓取多页以获取足够的数据 + // 按需采集多页以获取足够的数据 const items = []; let pageIndex = 0; const maxPagesToFetch = Math.ceil(limit / 10) + 1; // 假设每页约10条,多抓一页保险 while (items.length < limit && pageIndex < maxPagesToFetch) { const pageUrl = getPageUrl(pageIndex, targetUrl); - console.log(`正在抓取第 ${pageIndex + 1} 页: ${pageUrl}`); + console.log(`正在采集第 ${pageIndex + 1} 页: ${pageUrl}`); try { const html = await fetchHtml(pageUrl); const pageItems = parseList(html); if (pageItems.length === 0) { - console.log(`第 ${pageIndex + 1} 页没有数据,停止抓取`); + console.log(`第 ${pageIndex + 1} 页没有数据,停止采集`); break; } @@ -354,7 +359,7 @@ app.post('/api/report', async (req, res) => { await new Promise(resolve => setTimeout(resolve, 500)); } } catch (err) { - console.error(`抓取第 ${pageIndex + 1} 页失败: ${err.message}`); + console.error(`采集第 ${pageIndex + 1} 页失败: ${err.message}`); break; } } @@ -417,7 +422,7 @@ app.post('/api/report-daterange', async (req, res) => { try { const { startDate, endDate, threshold = 50, maxPages = 23 } = req.body; - // 按时间范围抓取列表 + // 按时间范围采集列表 const items = await fetchListByDateRange(startDate, endDate, maxPages); if (items.length === 0) { @@ -437,7 +442,7 @@ app.post('/api/report-daterange', async (req, res) => { }); } - // 抓取详情 + // 采集详情 const results = []; for (const item of items) { try { @@ -491,6 +496,50 @@ app.post('/api/report-daterange', async (req, res) => { } }); +// 发送报告邮件 +app.post('/api/send-email', async (req, res) => { + try { + const { emailConfig, report } = req.body; + + // 验证必需的配置参数 + if (!emailConfig || !emailConfig.smtpHost || !emailConfig.smtpUser || !emailConfig.smtpPass) { + return res.status(400).json({ + success: false, + error: '邮件配置不完整,请填写SMTP服务器、用户名和密码', + }); + } + + if (!emailConfig.recipients || emailConfig.recipients.trim() === '') { + return res.status(400).json({ + success: false, + error: '请至少指定一个收件人', + }); + } + + if (!report) { + return res.status(400).json({ + success: false, + error: '没有可发送的报告数据', + }); + } + + // 发送邮件 + const result = await sendReportEmail(emailConfig, report); + + res.json({ + success: true, + message: '邮件发送成功', + messageId: result.messageId, + }); + } catch (error) { + console.error('发送邮件API错误:', error); + res.status(500).json({ + success: false, + error: error.message, + }); + } +}); + app.listen(PORT, () => { console.log(`Server running at http://localhost:${PORT}`); });