From ad659c4ff0e5f42a1f0cb9b34dd7be6bf4258f75 Mon Sep 17 00:00:00 2001 From: zhaojunlong <5482498@qq.com> Date: Fri, 6 Mar 2026 15:37:56 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BD=BF=E7=94=A8firecrawl=20=E5=AE=9E?= =?UTF-8?q?=E7=8E=B0=E5=85=AC=E5=91=8A=E6=8A=93=E5=8F=96=E4=B8=8E=E5=88=86?= =?UTF-8?q?=E6=9E=90=E5=B7=A5=E5=85=B7=E7=9A=84=E7=BD=91=E9=A1=B5=E7=95=8C?= =?UTF-8?q?=E9=9D=A2=EF=BC=8C=E5=8C=85=E6=8B=AC=E6=8A=A5=E5=91=8A=E7=94=9F?= =?UTF-8?q?=E6=88=90=E3=80=81=E5=AF=BC=E5=87=BA=E5=92=8C=E9=82=AE=E4=BB=B6?= =?UTF-8?q?=E5=8F=91=E9=80=81=E5=8A=9F=E8=83=BD=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 8 +- config.json | 28 +- package-lock.json | 64 ++- package.json | 9 +- public/app.js | 87 +-- public/index.html | 881 +++++++++++++++++++++++++----- public/results.html | 1273 +++++++++++++++++++++++++++++++++++++++++++ results.json | 432 +++++++++++++++ src/emailService.js | 163 ++++++ src/scheduler.js | 669 ++++++----------------- src/server.js | 1066 +++++++++++------------------------- 11 files changed, 3190 insertions(+), 1490 deletions(-) create mode 100644 public/results.html create mode 100644 results.json diff --git a/.env.example b/.env.example index 55192bd..1251b42 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,15 @@ # 服务器端口配置 PORT=5000 +# Firecrawl API Key(在 https://www.firecrawl.dev/app/api-keys 获取) +FIRECRAWL_API_KEY=fc-your-api-key-here + # 环境说明: # - 开发环境:通常使用 5000 # - 生产环境:可以使用 80、8080 等 # # 使用方法: # 1. 复制此文件为 .env -# 2. 修改端口号 -# 3. 启动服务时会自动读取 +# 2. 填写 FIRECRAWL_API_KEY +# 3. 修改端口号(可选) +# 4. 启动服务时会自动读取 diff --git a/config.json b/config.json index 903e816..9661468 100644 --- a/config.json +++ b/config.json @@ -5,7 +5,7 @@ "winningThreshold": 0, "bidThreshold": 0, "description": "每天9点采集当日项目", - "timeRange": "thisMonth" + "timeRange": "today" }, "email": { "smtpHost": "smtp.qq.com", @@ -13,5 +13,29 @@ "smtpUser": "1076597680@qq.com", "smtpPass": "nfrjdiraqddsjeeh", "recipients": "5482498@qq.com" - } + }, + "scrapers": [ + { + "id": "scraper-1772762354799", + "city": "无锡市", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "prompt": "提取页面上今天的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL", + "enabled": true, + "model": "spark-1-mini" + }, + { + "id": "scraper-1772762494299", + "city": "南京市", + "url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/buildService1.html", + "section": "房建市政", + "subsection": "工程类", + "type": "招标公告", + "prompt": "提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL", + "enabled": false, + "model": "spark-1-mini" + } + ] } \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 2f6840a..a019c4d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,14 +8,38 @@ "name": "njggzy-scraper", "version": "2.0.0", "dependencies": { - "axios": "^1.6.8", + "@mendable/firecrawl-js": "^4.15.2", "cheerio": "^1.0.0-rc.12", "cors": "^2.8.5", "dotenv": "^17.2.3", "express": "^5.2.1", - "iconv-lite": "^0.6.3", "node-cron": "^4.2.1", - "nodemailer": "^7.0.11" + "nodemailer": "^7.0.11", + "zod": "^4.3.6" + } + }, + "node_modules/@mendable/firecrawl-js": { + "version": "4.15.2", + "resolved": "https://registry.npmmirror.com/@mendable/firecrawl-js/-/firecrawl-js-4.15.2.tgz", + "integrity": "sha512-J+lfnJpd00irDhy5ZJE58lsdqbc1fC1d7X6/UyF4VFASEGy1GDpR0FuVweasEpFfOhEGS5DZ+dq8Ui21zIFrOw==", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5", + "typescript-event-target": "^1.1.1", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" + }, + "engines": { + "node": ">=22.0.0" + } + }, + "node_modules/@mendable/firecrawl-js/node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmmirror.com/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" } }, "node_modules/accepts": { @@ -63,13 +87,13 @@ "license": "MIT" }, "node_modules/axios": { - "version": "1.13.2", - "resolved": "https://registry.npmmirror.com/axios/-/axios-1.13.2.tgz", - "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==", + "version": "1.13.6", + "resolved": "https://registry.npmmirror.com/axios/-/axios-1.13.6.tgz", + "integrity": "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==", "license": "MIT", "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.4", + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", "proxy-from-env": "^1.1.0" } }, @@ -1331,6 +1355,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/typescript-event-target": { + "version": "1.1.2", + "resolved": "https://registry.npmmirror.com/typescript-event-target/-/typescript-event-target-1.1.2.tgz", + "integrity": "sha512-TvkrTUpv7gCPlcnSoEwUVUBwsdheKm+HF5u2tPAKubkIGMfovdSizCTaZRY/NhR8+Ijy8iZZUapbVQAsNrkFrw==", + "license": "MIT" + }, "node_modules/undici": { "version": "7.16.0", "resolved": "https://registry.npmmirror.com/undici/-/undici-7.16.0.tgz", @@ -1384,6 +1414,24 @@ "resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "license": "ISC" + }, + "node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmmirror.com/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.25.1", + "resolved": "https://registry.npmmirror.com/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.25 || ^4" + } } } } diff --git a/package.json b/package.json index 1a7ef37..c218076 100644 --- a/package.json +++ b/package.json @@ -8,13 +8,12 @@ "start": "node src/server.js" }, "dependencies": { - "axios": "^1.6.8", - "cheerio": "^1.0.0-rc.12", + "@mendable/firecrawl-js": "latest", "cors": "^2.8.5", "dotenv": "^17.2.3", "express": "^5.2.1", - "iconv-lite": "^0.6.3", "node-cron": "^4.2.1", - "nodemailer": "^7.0.11" + "nodemailer": "^7.0.11", + "zod": "^3.24.2" } -} +} \ No newline at end of file diff --git a/public/app.js b/public/app.js index 7e63407..dc5d909 100644 --- a/public/app.js +++ b/public/app.js @@ -687,51 +687,34 @@ function cronToFriendlyText(cronTime) { // 加载定时任务配置 async function loadSchedulerConfig() { try { - // 从服务器获取配置 const response = await fetch(`${API_BASE}/config`); const data = await response.json(); if (data.success && data.data) { const config = data.data; - // 填充表单 if (config.scheduler) { document.getElementById('schedulerEnabled').checked = config.scheduler.enabled || false; const cronTime = config.scheduler.cronTime || '0 9 * * *'; document.getElementById('schedulerCronInput').value = cronTime; - document.getElementById('schedulerWinningThresholdInput').value = config.scheduler.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000; - document.getElementById('schedulerBidThresholdInput').value = config.scheduler.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0; + document.getElementById('schedulerThresholdInput').value = config.scheduler.threshold ?? 0; document.getElementById('schedulerDescription').value = config.scheduler.description || ''; - // 时间段配置 - document.getElementById('schedulerTimeRange').value = config.scheduler.timeRange || 'thisMonth'; - // 反向映射Cron表达式到预设选择器 const presetSelector = document.getElementById('schedulerCronPreset'); const customGroup = document.getElementById('customCronGroup'); - // 预设值列表 const presets = [ - '0 9 * * *', - '0 6 * * *', - '0 12 * * *', - '0 18 * * *', - '0 9,18 * * *', - '0 */6 * * *', - '0 */12 * * *', - '0 9 * * 1', - '0 9 1 * *' + '0 9 * * *', '0 6 * * *', '0 12 * * *', '0 18 * * *', + '0 9,18 * * *', '0 */6 * * *', '0 */12 * * *', '0 9 * * 1', '0 9 1 * *' ]; - // 检查是否匹配预设值 if (presets.includes(cronTime)) { presetSelector.value = cronTime; customGroup.style.display = 'none'; } else { - // 自定义时间 - 尝试解析为 "分 时 * * *" 格式 presetSelector.value = 'custom'; customGroup.style.display = 'block'; - const cronParts = cronTime.split(/\s+/); if (cronParts.length >= 2) { document.getElementById('customMinute').value = cronParts[0]; @@ -740,7 +723,6 @@ async function loadSchedulerConfig() { } } - // 更新状态显示 await updateSchedulerStatus(); } } catch (error) { @@ -776,7 +758,7 @@ function updateCustomCron() { cronInput.value = `${minute} ${hour} * * *`; } -document.addEventListener('DOMContentLoaded', function() { +document.addEventListener('DOMContentLoaded', function () { // 并行加载配置,提高加载速度 Promise.all([ loadEmailConfig().catch(err => console.error('加载邮件配置失败:', err)), @@ -813,20 +795,12 @@ async function updateSchedulerStatus() { // 更新执行计划 if (status.config) { document.getElementById('schedulerCronTime').textContent = cronToFriendlyText(status.config.cronTime); - const winningThreshold = status.config.winningThreshold; - if (winningThreshold === 0) { - document.getElementById('schedulerWinningThreshold').textContent = '不筛选'; - } else { - const winningBillion = (winningThreshold / 10000).toFixed(1); - document.getElementById('schedulerWinningThreshold').textContent = `${winningThreshold}万元 (${winningBillion}亿)`; - } - const bidThreshold = status.config.bidThreshold; - if (bidThreshold === 0) { - document.getElementById('schedulerBidThreshold').textContent = '不筛选'; - } else { - const bidBillion = (bidThreshold / 10000).toFixed(1); - document.getElementById('schedulerBidThreshold').textContent = `${bidThreshold}万元 (${bidBillion}亿)`; - } + } + + // 更新已启用来源数 + const enabledCountEl = document.getElementById('schedulerEnabledCount'); + if (enabledCountEl) { + enabledCountEl.textContent = `${status.enabledScrapers ?? '-'} 个`; } } } catch (error) { @@ -839,10 +813,8 @@ async function saveSchedulerConfig() { const schedulerConfig = { enabled: document.getElementById('schedulerEnabled').checked, cronTime: document.getElementById('schedulerCronInput').value, - winningThreshold: parseInt(document.getElementById('schedulerWinningThresholdInput').value), - bidThreshold: parseInt(document.getElementById('schedulerBidThresholdInput').value), + threshold: parseInt(document.getElementById('schedulerThresholdInput').value) || 0, description: document.getElementById('schedulerDescription').value, - timeRange: document.getElementById('schedulerTimeRange').value }; // 验证Cron表达式格式(简单验证) @@ -852,36 +824,16 @@ async function saveSchedulerConfig() { return; } - // 从localStorage获取邮件配置 - const emailConfigStr = localStorage.getItem('emailConfig'); - let emailConfig = {}; - - if (emailConfigStr) { - try { - emailConfig = JSON.parse(emailConfigStr); - } catch (e) { - console.error('解析邮件配置失败:', e); - } - } - - // 如果邮件配置为空,提示用户 - if (!emailConfig.smtpHost || !emailConfig.smtpUser) { - if (confirm('检测到邮件配置未完成,定时任务需要邮件配置才能发送报告。\n\n是否继续保存定时任务配置(不保存邮件配置)?')) { - // 继续保存,但不包含邮件配置 - } else { - return; - } - } - - // 构建完整配置对象 - const fullConfig = { - scheduler: schedulerConfig, - email: emailConfig - }; - showSchedulerStatus('正在保存配置...', 'info'); try { + // 先获取当前服务器配置(保留 email/scrapers 等字段) + const getResponse = await fetch(`${API_BASE}/config`); + const getData = await getResponse.json(); + const currentCfg = (getData.success && getData.data) ? getData.data : {}; + + const fullConfig = { ...currentCfg, scheduler: schedulerConfig }; + const response = await fetch(`${API_BASE}/config`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -892,7 +844,6 @@ async function saveSchedulerConfig() { if (data.success) { showSchedulerStatus('配置已保存,定时任务已重新加载!', 'success'); - // 刷新状态显示 await updateSchedulerStatus(); } else { showSchedulerStatus(`保存失败: ${data.error}`, 'error'); @@ -1255,6 +1206,6 @@ async function sendCombinedReportByEmail() { } // 页面加载时初始化报告日期 -document.addEventListener('DOMContentLoaded', function() { +document.addEventListener('DOMContentLoaded', function () { initReportDates(); }); diff --git a/public/index.html b/public/index.html index 51953c0..487f1be 100644 --- a/public/index.html +++ b/public/index.html @@ -1,5 +1,6 @@ + @@ -95,7 +96,8 @@ color: #333; } - .form-group input, .form-group select { + .form-group input, + .form-group select { width: 100%; padding: 12px; border: 2px solid #e0e0e0; @@ -104,7 +106,8 @@ transition: border 0.3s; } - .form-group input:focus, .form-group select:focus { + .form-group input:focus, + .form-group select:focus { outline: none; border-color: #667eea; } @@ -153,8 +156,13 @@ } @keyframes spin { - 0% { transform: rotate(0deg); } - 100% { transform: rotate(360deg); } + 0% { + transform: rotate(0deg); + } + + 100% { + transform: rotate(360deg); + } } .results { @@ -323,8 +331,375 @@ font-size: 14px; margin: 0 10px; } + + /* ===== 抓取来源配置页样式 ===== */ + .scrapers-toolbar { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 20px; + flex-wrap: wrap; + gap: 12px; + } + + .scrapers-toolbar h2 { + margin: 0; + color: #667eea; + font-size: 20px; + } + + .btn-add { + background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%); + color: white; + border: none; + padding: 10px 22px; + border-radius: 8px; + font-size: 15px; + cursor: pointer; + font-weight: 600; + display: flex; + align-items: center; + gap: 6px; + transition: all 0.2s; + box-shadow: 0 2px 8px rgba(17, 153, 142, 0.3); + } + + .btn-add:hover { + transform: translateY(-1px); + box-shadow: 0 4px 14px rgba(17, 153, 142, 0.4); + } + + .scrapers-table-wrap { + overflow-x: auto; + border-radius: 12px; + border: 1px solid #e8eaf0; + box-shadow: 0 2px 12px rgba(102, 126, 234, 0.06); + } + + .scrapers-table { + width: 100%; + border-collapse: collapse; + font-size: 14px; + min-width: 800px; + } + + .scrapers-table thead tr { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + } + + .scrapers-table th { + padding: 14px 14px; + text-align: left; + font-weight: 600; + white-space: nowrap; + } + + .scrapers-table tbody tr { + border-bottom: 1px solid #f0f0f5; + transition: background 0.15s; + } + + .scrapers-table tbody tr:last-child { + border-bottom: none; + } + + .scrapers-table tbody tr:hover { + background: #f5f7ff; + } + + .scrapers-table td { + padding: 12px 14px; + vertical-align: top; + color: #333; + } + + .scrapers-table td.prompt-cell { + max-width: 220px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + color: #666; + font-size: 13px; + } + + .tag { + display: inline-block; + padding: 2px 10px; + border-radius: 20px; + font-size: 12px; + font-weight: 600; + white-space: nowrap; + } + + .tag-type { + background: #e8f4fd; + color: #1a73c8; + } + + .tag-enabled { + background: #e4f9ee; + color: #1a8a4a; + } + + .tag-disabled { + background: #feeaea; + color: #c0392b; + } + + .url-cell a { + color: #667eea; + text-decoration: none; + font-size: 12px; + word-break: break-all; + } + + .url-cell a:hover { + text-decoration: underline; + } + + .action-btns { + display: flex; + gap: 6px; + flex-wrap: wrap; + } + + .btn-sm { + padding: 5px 12px; + border-radius: 6px; + border: none; + font-size: 12px; + font-weight: 600; + cursor: pointer; + transition: all 0.15s; + white-space: nowrap; + } + + .btn-edit { + background: #fff3cd; + color: #856404; + } + + .btn-edit:hover { + background: #ffc107; + color: #fff; + } + + .btn-delete { + background: #fdeaea; + color: #c0392b; + } + + .btn-delete:hover { + background: #e74c3c; + color: #fff; + } + + .btn-run { + background: #e8f4fd; + color: #1a73c8; + } + + .btn-run:hover { + background: #667eea; + color: #fff; + } + + .btn-toggle-on { + background: #e4f9ee; + color: #1a8a4a; + } + + .btn-toggle-on:hover { + background: #27ae60; + color: #fff; + } + + .btn-toggle-off { + background: #feeaea; + color: #c0392b; + } + + .btn-toggle-off:hover { + background: #e74c3c; + color: #fff; + } + + .empty-state { + text-align: center; + padding: 60px 20px; + color: #aaa; + } + + .empty-state svg { + margin-bottom: 12px; + opacity: 0.4; + } + + /* 弹窗 */ + .modal-overlay { + display: none; + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.45); + z-index: 1000; + align-items: center; + justify-content: center; + } + + .modal-overlay.show { + display: flex; + } + + .modal-box { + background: white; + border-radius: 16px; + padding: 32px; + width: 600px; + max-width: 95vw; + max-height: 90vh; + overflow-y: auto; + box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3); + animation: modalIn 0.2s ease; + } + + @keyframes modalIn { + from { + opacity: 0; + transform: scale(0.95) translateY(-10px); + } + + to { + opacity: 1; + transform: scale(1) translateY(0); + } + } + + .modal-header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 24px; + } + + .modal-header h3 { + margin: 0; + color: #333; + font-size: 18px; + } + + .modal-close { + background: none; + border: none; + font-size: 24px; + cursor: pointer; + color: #999; + line-height: 1; + padding: 0; + } + + .modal-close:hover { + color: #333; + } + + .modal-form .form-row { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 14px; + } + + .modal-form .form-group { + margin-bottom: 16px; + } + + .modal-form .form-group label { + display: block; + font-size: 13px; + font-weight: 600; + color: #555; + margin-bottom: 6px; + } + + .modal-form .form-group input, + .modal-form .form-group select, + .modal-form .form-group textarea { + width: 100%; + padding: 10px 12px; + border: 1.5px solid #e0e0e0; + border-radius: 8px; + font-size: 14px; + font-family: inherit; + transition: border 0.2s; + box-sizing: border-box; + } + + .modal-form .form-group input:focus, + .modal-form .form-group select:focus, + .modal-form .form-group textarea:focus { + outline: none; + border-color: #667eea; + box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.12); + } + + .modal-form .form-group textarea { + resize: vertical; + min-height: 90px; + } + + .modal-footer { + display: flex; + justify-content: flex-end; + gap: 10px; + margin-top: 20px; + } + + .btn-cancel { + background: #f0f0f0; + color: #555; + border: none; + padding: 10px 24px; + border-radius: 8px; + font-size: 14px; + cursor: pointer; + font-weight: 600; + } + + .btn-cancel:hover { + background: #e0e0e0; + } + + .btn-save { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + border: none; + padding: 10px 28px; + border-radius: 8px; + font-size: 14px; + cursor: pointer; + font-weight: 600; + transition: all 0.2s; + } + + .btn-save:hover { + box-shadow: 0 4px 14px rgba(102, 126, 234, 0.4); + } + + .run-result { + margin-top: 16px; + padding: 14px; + background: #f7f8ff; + border-radius: 8px; + border: 1px solid #e0e5ff; + font-size: 13px; + max-height: 300px; + overflow-y: auto; + white-space: pre-wrap; + word-break: break-all; + color: #333; + } +
@@ -333,124 +708,37 @@
- - - - + + + 📊 抓取结果
- -
-
- - -
- - -
-
-

正在采集...

-
- -
- - -
- - -
-

交通水务招标公告

-

浏览招标公告列表

- -
- - -
- - -
-
-

正在获取招标公告列表...

-
- -
- - -
- - -
-

生成综合报告

-

同时采集中标公示和招标公告,生成综合报告

- -
- - -
-
- - -
-
- - -
-
- - -
-
- - -
- - - - -
-
-

正在生成报告...

-
- -
-
- -
+

定时任务配置

配置定时任务自动采集大于指定金额的项目并发送邮件报告

-
+

任务状态

运行状态
-
加载中...
+
加载中...
执行时间
-
-
+
- +
-
中标阈值
-
-
-
-
-
招标阈值
-
-
+
已启用来源
+
-
@@ -500,52 +788,35 @@ -
- - - - 今日:今天 | 本周:本周一至今 | 本月:本月1日至今 - -
- - + + - 10亿 = 100000万元 | 5亿 = 50000万元 | 1亿 = 10000万元 - -
- -
- - - - 设为0时不筛选金额,只要有合同估算价的招标公告都会采集 + 10亿 = 100000万元 | 1亿 = 10000万元 | 0 = 不筛选,全部显示
- +
- +
-
+

使用说明

    -
  • 数据来源: 南京公共资源交易平台 - 交通水务中标公示 + 招标公告
  • -
  • 中标采集: 标段编号、项目名称、标段名称、中标价格、中标日期(按中标阈值筛选)
  • -
  • 招标采集: 项目名称、标段编码、招标人、合同估算价、工期(按招标阈值筛选,0表示不筛选)
  • -
  • 邮件发送: 自动将中标+招标综合报告生成HTML邮件并发送到配置的邮箱
  • +
  • 数据来源: 运行「抓取来源」页中所有已启用的抓取配置
  • +
  • 自动抓取: 按计划时间自动逐个运行所有启用的抓取来源,结果保存到「抓取结果」页
  • +
  • 邮件通知: 抓取完成后自动将结果发送到配置的邮箱(需先完成邮件配置)
  • +
  • 提示: 请前往「抓取来源」页配置并启用需要定时抓取的来源
@@ -581,11 +852,13 @@
- +
-
+

常用邮箱配置参考

  • QQ邮箱: smtp.qq.com, 端口 587 或 465, 需要使用授权码
  • @@ -599,9 +872,345 @@
+ + +
+
+

抓取来源配置

+
+ + + + + + + 查看结果 + + +
+
+ +

通过配置 URL 和提示词,使用 Firecrawl Agent + 抓取任意网页数据。结果会自动保存,可在「抓取结果」页查看历史。

+ +
+ + + + + + + + + + + + + + + + + + + +
城市板块子板块类型链接地址提示词AI模型状态操作
+ + + + +
暂无配置,点击「新增来源」添加抓取任务
+
+
+ + + + + + +
+
+
+ + + + - + + \ No newline at end of file diff --git a/public/results.html b/public/results.html new file mode 100644 index 0000000..5ff32d2 --- /dev/null +++ b/public/results.html @@ -0,0 +1,1273 @@ + + + + + + + 抓取结果查看 - 公告采集工具 + + + + + +
+ + + 公告采集工具 + + +
+ +
+ + + + +
+
+
总记录数
+
-
+
条抓取记录
+
+
+
成功条数
+
-
+
已成功抓取
+
+
+
失败条数
+
-
+
抓取出错
+
+
+
来源城市
+
-
+
个不同城市
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + +
+
+ + +
+
+

加载中...

+
+ + +
+ + + +
+ + +
+ + +
+
+
⚠️ 确认清空
+
确定要清空所有抓取结果吗?此操作不可撤销。
+
+ + +
+
+
+ + +
+
+
+

抓取结果详情

+ +
+
+
+
+
+
+ + + + + \ No newline at end of file diff --git a/results.json b/results.json new file mode 100644 index 0000000..03b076f --- /dev/null +++ b/results.json @@ -0,0 +1,432 @@ +[ + { + "scraperId": "scraper-1772762354799", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml", + "scrapedAt": "2026-03-06T06:57:46.881Z", + "data": { + "result": [ + { + "title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告", + "amount": "5,923,797元", + "date": "2026-03-05", + "url": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml" + } + ], + "total": 1 + }, + "id": "result-1772780266881-odaof" + }, + { + "scraperId": "scraper-1772762354799", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml", + "scrapedAt": "2026-03-06T06:42:40.619Z", + "data": { + "result": [ + { + "title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告", + "amount": "5923797元", + "date": "2026-03-05", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml" + } + ], + "total": 1 + }, + "id": "result-1772779360620-xr7ue" + }, + { + "scraperId": "scraper-1772762354799", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml", + "scrapedAt": "2026-03-06T04:02:43.530Z", + "data": { + "items": [ + { + "title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告", + "amount": "5923797元", + "date": "2026-03-05", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml" + } + ], + "total": 1 + }, + "id": "result-1772769763530-3axw2" + }, + { + "scraperId": "scraper-1772762354799", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml", + "scrapedAt": "2026-03-06T02:51:39.452Z", + "error": "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value.", + "data": null, + "id": "result-1772765499452-ynhn0" + }, + { + "scraperId": "scraper-1772762494299", + "city": "南京市", + "section": "房建市政", + "subsection": "工程类", + "type": "招标公告", + "url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/buildService1.html", + "scrapedAt": "2026-03-06T02:32:03.818Z", + "data": { + "success": true, + "status": "completed", + "data": { + "target_date": "2026-03-06", + "notice_count": 0, + "notices": [], + "message": "截至当前时间(2026-03-06 02:19),网站尚未发布今日(2026-03-06)的招标公告。最新公告日期为2026-03-05。", + "recent_notices_fallback": [ + { + "title": "麒麟科创园具身智能训练场装修项目", + "date": "2026-03-05", + "url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/a20ee94f-b76e-4f88-b8df-2847c2f35ce1.html", + "amount": "5660000.00" + }, + { + "title": "站东13号(MCd080-07-08)地块10kV电力杆线迁改工程", + "date": "2026-03-05", + "url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/f0b99840-e8de-4a08-b2ba-3e57a347864c.html", + "amount": "9543100.00" + }, + { + "title": "【澄清公告】螺丝桥大街北延(月安街至应天大街段)道路工程", + "date": "2026-03-05", + "url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/1b3da624-fe86-4755-a268-a1967cd9d489.html", + "amount": "900万元" + }, + { + "title": "建邺路150-164号等9个地块城中村改造项目", + "date": "2026-03-05", + "url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/6f4fcf2f-d198-4814-acd8-9817ef559a0c.html", + "amount": "1,900,000.00" + }, + { + "title": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程", + "date": "2026-03-05", + "url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/11ec2263-4ed1-4115-bdd1-0a6dcbf1d6c1.html", + "amount": "11320.01万元" + } + ] + }, + "model": "spark-1-mini", + "expiresAt": "2026-03-07T02:32:00.316Z", + "creditsUsed": 0 + }, + "id": "result-1772764323818-mj8km" + }, + { + "scraperId": "scraper-1772762354799", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/zbgg/index.shtml", + "scrapedAt": "2026-03-06T02:19:27.580Z", + "data": { + "success": true, + "status": "completed", + "data": [ + { + "title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告", + "project_amount": "5,923,797元 (最高投标限价)", + "publish_date": "2026-03-05", + "detail_url": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml" + } + ], + "model": "spark-1-mini", + "expiresAt": "2026-03-07T02:19:24.631Z", + "creditsUsed": 0 + }, + "id": "result-1772763567581-ahz62" + }, + { + "scraperId": "scraper-1772699302521", + "city": "无锡市", + "section": "", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml", + "scrapedAt": "2026-03-05T10:05:46.148Z", + "data": { + "success": true, + "status": "completed", + "data": { + "announcements": [ + { + "title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告", + "project_amount": "最高投标限价:5923797元", + "publish_date": "2026-03-05", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml" + } + ] + }, + "model": "spark-1-mini", + "expiresAt": "2026-03-06T10:05:45.297Z", + "creditsUsed": 180 + }, + "id": "result-1772705146148-kn0ko" + }, + { + "scraperId": "scraper-1772699302521", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/index.shtml", + "scrapedAt": "2026-03-05T10:02:01.153Z", + "data": { + "success": true, + "status": "completed", + "data": [ + { + "标题": "高新区三级防控系统工程周三房浜闸站工程施工招标公告", + "项目金额": "5,923,797元", + "发布日期": "2026-03-05", + "详情页完整URL": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml" + } + ], + "model": "spark-1-mini", + "expiresAt": "2026-03-06T10:02:00.100Z", + "creditsUsed": 769 + }, + "id": "result-1772704921153-jx48m" + }, + { + "scraperId": "scraper-1772699302521", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/", + "scrapedAt": "2026-03-05T09:23:03.452Z", + "data": { + "success": true, + "status": "completed", + "data": { + "announcements": [ + { + "title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告", + "amount": "5,923,797元", + "publish_date": "2026-03-05", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml" + }, + { + "title": "[WXHS202603001-X01]惠山区紧密型县域医共体服务能力提标扩能建设项目(惠山区人民医院紧密型医共体资源共享中心建设项目)勘察设计", + "amount": "570.00万元", + "publish_date": "2026-03-05", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741246.shtml" + } + ] + }, + "model": "spark-1-mini", + "expiresAt": "2026-03-06T09:23:01.561Z", + "creditsUsed": 0 + }, + "id": "result-1772702583452-9t3b8" + }, + { + "scraperId": "scraper-1772699302521", + "city": "无锡市", + "section": "水利工程", + "subsection": "", + "type": "招标公告", + "url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/", + "scrapedAt": "2026-03-05T08:39:45.736Z", + "data": { + "success": true, + "status": "completed", + "data": [ + { + "title": "[WXJY202601013-X01]江阴市长泾镇蒲市村区域性综合农事服务中心江阴市", + "amount": "874.0万元", + "date": "2026-01-30", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726538.shtml" + }, + { + "title": "[WXXS202406006-X02]中共锡山区委党校异地新建项目施工总承包", + "amount": "10350.0万元", + "date": "2026-01-30", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726721.shtml" + }, + { + "title": "[WXXQ202601010-X01]无锡交响音乐厅“一厅”及“两中心”品牌商户用房", + "amount": "400.0万元", + "date": "2026-01-30", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726619.shtml" + }, + { + "title": "[WXXQ202601008-X01]生命园三期2号楼、3号楼改造项目工程总承包", + "amount": "3650.0万元", + "date": "2026-01-30", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726675.shtml" + }, + { + "title": "[WXBH202601007-X01]军嶂山显山透绿工程-吴杨路郊野覆绿工程施工", + "amount": "440.0万元", + "date": "2026-01-30", + "url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726726.shtml" + } + ], + "model": "spark-1-mini", + "expiresAt": "2026-03-06T08:39:45.265Z", + "creditsUsed": 0 + }, + "id": "result-1772699985736-b3nca" + }, + { + "scraperId": "nj-jtsw-zbgg", + "city": "南京市", + "section": "房建市政", + "subsection": "工程类", + "type": "招标公告", + "url": "https://njggzy.nanjing.gov.cn/njweb/", + "scrapedAt": "2026-03-05T08:05:33.097Z", + "data": { + "success": true, + "status": "completed", + "data": { + "招标公告": [ + { + "标题": "【澄清公告】螺丝桥大街北延(月安街至应天大街段)道路工程 - 施工", + "项目金额": "900 万元", + "发布日期": "2026-03-05", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/1b3da624-fe86-4755-a268-a1967cd9d489.html" + }, + { + "标题": "建邺路150-164号等9个地块城中村改造项目 - 施工", + "项目金额": "190 万元", + "发布日期": "2026-03-05", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/6f4fcf2f-d198-4814-acd8-9817ef559a0c.html" + }, + { + "标题": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程 - 施工", + "项目金额": "11320.01 万元", + "发布日期": "2026-03-05", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/11ec2263-4ed1-4115-bdd1-0a6dcbf1d6c1.html" + }, + { + "标题": "栖霞区百水芊城春水坊等5个片区排水管网改造工程 - 施工", + "项目金额": "435.86 万元", + "发布日期": "2026-03-05", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/d69e5640-d549-4638-a64a-d1f9df58a903.html" + }, + { + "标题": "【澄清公告】兰桥八期保障性住房项目 - 新建居住区供配电工程", + "项目金额": "6000 万元", + "发布日期": "2026-03-04", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/33e25a55-42c4-471e-9a3c-f8e792957141.html" + }, + { + "标题": "青云巷10号危房整治工程 - SG1施工", + "项目金额": "375 万元", + "发布日期": "2026-03-04", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/e821f82c-39d8-479e-9457-b6bf5d101d80.html" + }, + { + "标题": "百水工业园地块保障房一期项目 - D地块1#楼(公安编号)室内装饰工程", + "项目金额": "600 万元", + "发布日期": "2026-03-04", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/5f8f2183-e26f-4c03-a76a-8b4d61b0011c.html" + }, + { + "标题": "青云巷10号危房整治工程 - SG1施工", + "项目金额": "375 万元", + "发布日期": "2026-03-04", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/9aa2d916-c0c3-4fb6-afa4-37457f0d2ceb.html" + }, + { + "标题": "【澄清公告】全国高校区域技术转移转化中心生物药物创新平台 - 施工", + "项目金额": "11000 万元", + "发布日期": "2026-03-03", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260303/2d1fe57f-fe0e-42f9-a99a-c345683aed3f.html" + }, + { + "标题": "轻质耐热合金制造基地项目 - 施工", + "项目金额": "11000 万元", + "发布日期": "2026-03-03", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260303/78b81308-1389-42fc-a8de-23b6b2b40be1.html" + }, + { + "标题": "【澄清公告】润埠花园二期项目 - 监理", + "项目金额": "111.37 万元", + "发布日期": "2026-03-05", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260305/acb0010f-dcbc-4ea4-a988-e4dc75670999.html" + }, + { + "标题": "轻质耐热合金制造基地项目 - 监理", + "项目金额": "188 万元", + "发布日期": "2026-03-04", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260304/93ee4804-5a5e-4524-92a3-b6c367803bd1.html" + }, + { + "标题": "【澄清公告】南京江北新区无人机制造共享工厂项目 - 监理", + "项目金额": "212.44 万元", + "发布日期": "2026-03-04", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260304/e44a1d28-0f43-494e-8daf-2f81252ed06a.html" + }, + { + "标题": "2026年四项环卫设施大中修项目 - 设计", + "项目金额": "25.58 万元", + "发布日期": "2026-03-03", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260303/225961f4-08c8-4398-99c9-7777bf0d16b7.html" + }, + { + "标题": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程 - 监理", + "项目金额": "164.33 万元", + "发布日期": "2026-03-03", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260303/a827d48e-8e1f-42c9-bd07-09ce369c20c6.html" + }, + { + "标题": "江苏银行金融科技中心建设项目 - 勘察", + "项目金额": "170 万元", + "发布日期": "2026-03-02", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/0ead5303-03db-4d95-b8ea-b32070a39dfa.html" + }, + { + "标题": "【澄清公告】南京高新区溧水园和凤园区改扩建项目 - 精诚电工地块及惠诚工具地块扩建厂房设计", + "项目金额": "140.68 万元", + "发布日期": "2026-03-02", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/d8df73f9-88d0-4f5d-8831-f9857a1a4ebc.html" + }, + { + "标题": "【澄清公告】NO.新区2025G11房地产开发项目 - 全过程工程咨询服务", + "项目金额": "950 万元", + "发布日期": "2026-03-02", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/348f6add-d17e-406d-9690-b637762175d7.html" + }, + { + "标题": "江苏省六合高级中学新建食堂体育馆项目 - 渣土运输处置", + "项目金额": "242.97917 万元", + "发布日期": "2026-02-28", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260228/2099a860-b3c2-411f-8580-72cbb55fef42.html" + }, + { + "标题": "【澄清公告】药谷产业区药谷大道(华宝路-汤盘公路)建设工程 - 勘察设计", + "项目金额": "194 万元", + "发布日期": "2026-02-28", + "详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260228/ffee9562-374d-43fd-8829-bf51c5b3cb46.html" + } + ] + }, + "model": "spark-1-mini", + "expiresAt": "2026-03-06T08:05:31.995Z", + "creditsUsed": 0 + }, + "id": "result-1772697933097-7hm4v" + } +] \ No newline at end of file diff --git a/src/emailService.js b/src/emailService.js index 25e7668..4c5f29e 100644 --- a/src/emailService.js +++ b/src/emailService.js @@ -730,3 +730,166 @@ function generateReportHtml(report) { `; } + +// ========== 通用抓取结果邮件(定时任务使用) ========== + +export async function sendScraperResultsEmail(emailConfig, results) { + try { + const transporter = nodemailer.createTransport({ + host: emailConfig.smtpHost, + port: emailConfig.smtpPort || 587, + secure: emailConfig.smtpPort === 465, + auth: { + user: emailConfig.smtpUser, + pass: emailConfig.smtpPass, + }, + }); + + const htmlContent = generateScraperResultsHtml(results); + const successCount = results.filter(r => !r.error).length; + + const info = await transporter.sendMail({ + from: `"公告采集系统" <${emailConfig.smtpUser}>`, + to: emailConfig.recipients, + subject: `公告采集结果报告(${successCount}条) - ${new Date().toLocaleDateString('zh-CN')}`, + html: htmlContent, + }); + + return { success: true, messageId: info.messageId }; + } catch (error) { + console.error('发送抓取结果邮件失败:', error); + throw new Error(`邮件发送失败: ${error.message}`); + } +} + +function generateScraperResultsHtml(results) { + const successResults = results.filter(r => !r.error); + const failResults = results.filter(r => r.error); + const generatedAt = new Date().toLocaleString('zh-CN'); + + // 把所有成功来源的 items 展开,附带来源信息 + const allRows = []; + for (const r of successResults) { + const items = r.data?.result || []; + for (const item of items) { + allRows.push({ + section: [r.section, r.subsection].filter(Boolean).join(' · ') || r.city || '-', + type: r.type || '-', + title: item.title || '-', + date: item.date || '-', + amount: item.amount || '未公开', + url: item.url || '', + }); + } + } + + // 按日期降序排列 + allRows.sort((a, b) => { + if (a.date === b.date) return 0; + return a.date > b.date ? -1 : 1; + }); + + const totalItems = allRows.length; + + // 行颜色交替 + const rowHtml = allRows.length === 0 + ? `暂无数据` + : allRows.map((row, i) => ` + + ${row.section} + + ${row.type} + + ${row.title} + ${row.date} + ${row.amount} + + ${row.url + ? `查看 →` + : '-' + } + + `).join(''); + + // 失败来源列表 + const failHtml = failResults.length === 0 ? '' : ` +
+
⚠️ 抓取失败的来源(${failResults.length} 个)
+ ${failResults.map(r => ` +
+ ${r.city || ''}${r.section ? ' · ' + r.section : ''}${r.type ? ' · ' + r.type : ''} +
${r.url}
+
❌ ${r.error}
+
`).join('')} +
`; + + return ` + + + + + + 公告采集结果报告 + + +
+ + +
+

📋 公告采集结果报告

+
生成时间:${generatedAt}
+
+ + +
+
+
${totalItems}
+
公告总数
+
+
+
${successResults.length}
+
成功来源
+
+
+
${allRows.filter(r => r.amount && r.amount !== '未公开').length}
+
有金额
+
+
+
${failResults.length}
+
失败来源
+
+
+ + +
+
公告汇总(共 ${totalItems} 条)
+
+ + + + + + + + + + + + + ${rowHtml} + +
板块类型公告标题发布日期项目金额详情
+
+ + ${failHtml} + +
+ 本报告由公告采集系统自动生成 · ${generatedAt} +
+
+ +
+ + + `; +} diff --git a/src/scheduler.js b/src/scheduler.js index c9990a3..1174f41 100644 --- a/src/scheduler.js +++ b/src/scheduler.js @@ -1,503 +1,194 @@ +import 'dotenv/config'; import cron from 'node-cron'; -import { readFileSync } from 'fs'; +import { readFileSync, writeFileSync, existsSync } from 'fs'; import { fileURLToPath } from 'url'; import { dirname, join } from 'path'; -import axios from 'axios'; -import * as cheerio from 'cheerio'; -import iconv from 'iconv-lite'; -import { sendCombinedReportEmail } from './emailService.js'; +import Firecrawl from '@mendable/firecrawl-js'; +import { z } from 'zod'; +import { sendScraperResultsEmail } from './emailService.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); +// 初始化 Firecrawl 客户端 +const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY }); + +const RESULTS_PATH = join(__dirname, '..', 'results.json'); + // 加载配置文件 function loadConfig() { try { const configPath = join(__dirname, '..', 'config.json'); - const configContent = readFileSync(configPath, 'utf-8'); - return JSON.parse(configContent); + return JSON.parse(readFileSync(configPath, 'utf-8')); } catch (error) { console.error('加载配置文件失败:', error.message); - console.error('请确保 config.json 文件存在并配置正确'); return null; } } -// 根据时间范围类型获取开始和结束日期 -function getDateRangeByType(timeRange) { - const now = new Date(); - const year = now.getFullYear(); - const month = String(now.getMonth() + 1).padStart(2, '0'); - const day = String(now.getDate()).padStart(2, '0'); +// ========== 结果存取(与 server.js 保持一致) ========== - let startDate, endDate; - endDate = `${year}-${month}-${day}`; // 结束日期都是今天 - - switch (timeRange) { - case 'today': - // 今日 - startDate = `${year}-${month}-${day}`; - break; - - case 'thisWeek': { - // 本周 (从周一开始) - const dayOfWeek = now.getDay(); // 0是周日,1是周一 - const diff = dayOfWeek === 0 ? 6 : dayOfWeek - 1; // 计算到周一的天数差 - const monday = new Date(now); - monday.setDate(now.getDate() - diff); - const weekYear = monday.getFullYear(); - const weekMonth = String(monday.getMonth() + 1).padStart(2, '0'); - const weekDay = String(monday.getDate()).padStart(2, '0'); - startDate = `${weekYear}-${weekMonth}-${weekDay}`; - break; - } - - case 'thisMonth': - default: - // 本月 - startDate = `${year}-${month}-01`; - break; +function readResults() { + if (!existsSync(RESULTS_PATH)) return []; + try { + return JSON.parse(readFileSync(RESULTS_PATH, 'utf-8')); + } catch (e) { + return []; } - - return { startDate, endDate }; } -// 南京市公共资源交易平台 - 交通水务中标结果公示 -const BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069008/'; +function saveResults(results) { + writeFileSync(RESULTS_PATH, JSON.stringify(results, null, 2), 'utf-8'); +} -// 南京市公共资源交易平台 - 交通水务招标公告 -const BID_ANNOUNCE_BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069001/'; +function appendResult(result) { + const results = readResults(); + results.unshift({ ...result, id: `result-${Date.now()}-${Math.random().toString(36).slice(2, 7)}` }); + if (results.length > 500) results.splice(500); + saveResults(results); +} -const http = axios.create({ - responseType: 'arraybuffer', - timeout: 15000, - headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', - }, +// ========== 统一的公告抓取 Schema ========== + +// 公告抓取 Schema(result 包装数组) +const announcementSchema = z.object({ + result: z.array(z.object({ + title: z.string().describe('公告标题'), + amount: z.string().nullable().describe('项目金额(合同预估价/最高投标限价等),没有则为null'), + date: z.string().describe('发布日期,YYYY-MM-DD格式'), + url: z.string().describe('详情页完整URL,以https://开头'), + })).describe('页面上提取到的所有公告条目'), }); -function pickEncoding(contentType = '') { - const match = /charset=([^;]+)/i.exec(contentType); - if (!match) return 'utf-8'; - const charset = match[1].trim().toLowerCase(); - if (charset.includes('gb')) return 'gbk'; - return charset; -} - -async function fetchHtml(url) { - const res = await http.get(url); - const encoding = pickEncoding(res.headers['content-type']); - const html = iconv.decode(res.data, encoding || 'utf-8'); - return html; -} - -function getPageUrl(pageIndex) { - if (pageIndex === 1) { - return `${BASE_URL}moreinfosl3.html`; +/** 从 Firecrawl 返回结果中提取 result 数组 */ +function extractItems(raw) { + if (!raw) return []; + const root = (raw.data && typeof raw.data === 'object') ? raw.data : raw; + if (Array.isArray(root.result)) return root.result; + if (root.result && typeof root.result === 'object') { + const keys = Object.keys(root.result).filter(k => !isNaN(parseInt(k))); + if (keys.length > 0) return keys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root.result[k]); } - return `${BASE_URL}${pageIndex}.html`; + if (Array.isArray(root)) return root; + const numericKeys = Object.keys(root).filter(k => !isNaN(parseInt(k))); + if (numericKeys.length > 0) return numericKeys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root[k]); + return []; } -// 解析列表页HTML,提取中标结果信息 -function parseList(html) { - const $ = cheerio.load(html); - const items = []; +// ========== 抓取执行(复用 server.js 中 runScraper 的逻辑) ========== - $('li.ewb-info-item2').each((_, row) => { - const $row = $(row); - const cells = $row.find('div.ewb-info-num2'); +async function runScraper(scraper) { + console.log(`[定时任务] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`); + const fullPrompt = `访问这个URL: ${scraper.url} +【目标区域】:${scraper.section || ''} - ${scraper.subsection || ''} +【公告类型】:${scraper.type || ''} - if (cells.length >= 5) { - const bidNo = $(cells[0]).find('p').attr('title') || $(cells[0]).find('p').text().trim(); - const projectName = $(cells[1]).find('p').attr('title') || $(cells[1]).find('p').text().trim(); - const bidName = $(cells[2]).find('p').attr('title') || $(cells[2]).find('p').text().trim(); - const winningPrice = $(cells[3]).find('p').text().trim(); // 中标价格 - const winningDate = $(cells[4]).find('p').text().trim(); // 中标日期 +${scraper.prompt || '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL'} - const onclick = $row.attr('onclick') || ''; - const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/); - let href = ''; - if (hrefMatch) { - href = hrefMatch[1]; - if (href.startsWith('/')) { - href = `https://njggzy.nanjing.gov.cn${href}`; - } - } +请严格按照定义的 JSON 格式返回,每条公告包含 title、amount、date、url 四个字段。`; - if (!/^\d{4}-\d{2}-\d{2}$/.test(winningDate)) return; - - const price = parseFloat(winningPrice); - if (isNaN(price)) return; - - items.push({ - bidNo, - title: projectName, - bidName, - winningBid: { // 中标金额 - amount: price, - unit: '万元' - }, - date: winningDate, - href - }); - } + const result = await firecrawl.agent({ + prompt: fullPrompt, + schema: announcementSchema, + model: scraper.model || 'spark-1-mini', }); - return items; + console.log('[定时任务] 原始返回结果:', JSON.stringify(result).slice(0, 500)); + + // 标准化结果 + const rawItems = extractItems(result); + const items = rawItems.map(item => ({ + title: item.title || '', + amount: item.amount || null, + date: item.date || '', + url: item.url || '', + })); + + console.log(`[定时任务] 提取到 ${items.length} 条公告`); + + const record = { + scraperId: scraper.id, + city: scraper.city, + section: scraper.section, + subsection: scraper.subsection, + type: scraper.type, + url: scraper.url, + scrapedAt: new Date().toISOString(), + data: { result: items, total: items.length }, + }; + appendResult(record); + return record; } -function isDateInRange(dateStr, startDate, endDate) { - if (!dateStr) return false; - const date = new Date(dateStr); - if (isNaN(date.getTime())) return false; +// ========== 定时任务执行函数 ========== - if (startDate && date < new Date(startDate)) return false; - if (endDate && date > new Date(endDate)) return false; - return true; -} - -async function fetchListByDateRange(startDate, endDate, maxPages = 50) { - const allItems = []; - let shouldContinue = true; - let pageIndex = 1; - - console.log(`开始按时间范围采集: ${startDate || '不限'} 至 ${endDate || '不限'}`); - - while (shouldContinue && pageIndex <= maxPages) { - const pageUrl = getPageUrl(pageIndex); - console.log(`正在采集第 ${pageIndex} 页: ${pageUrl}`); - - try { - const html = await fetchHtml(pageUrl); - const items = parseList(html); - - if (items.length === 0) { - console.log(`第 ${pageIndex} 页没有数据,停止采集`); - break; - } - - let hasItemsInRange = false; - let allItemsBeforeRange = true; - - for (const item of items) { - if (isDateInRange(item.date, startDate, endDate)) { - allItems.push(item); - hasItemsInRange = true; - allItemsBeforeRange = false; - } else if (startDate && new Date(item.date) < new Date(startDate)) { - allItemsBeforeRange = allItemsBeforeRange && true; - } else { - allItemsBeforeRange = false; - } - } - - if (allItemsBeforeRange && startDate) { - console.log(`第 ${pageIndex} 页所有项目都早于起始日期,停止采集`); - shouldContinue = false; - } - - console.log(`第 ${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`); - - pageIndex++; - - if (shouldContinue && pageIndex <= maxPages) { - await new Promise(resolve => setTimeout(resolve, 500)); - } - } catch (err) { - console.error(`采集第 ${pageIndex} 页失败: ${err.message}`); - break; - } - } - - console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的公告`); - return allItems; -} - -// ========== 招标公告采集函数 ========== - -// 获取招标公告分页URL -function getBidAnnouncePageUrl(pageIndex) { - if (pageIndex === 1) { - return `${BID_ANNOUNCE_BASE_URL}moreinfo5dc.html`; - } - return `${BID_ANNOUNCE_BASE_URL}${pageIndex}.html`; -} - -// 解析招标公告列表页HTML -function parseBidAnnounceList(html) { - const $ = cheerio.load(html); - const items = []; - - $('li.ewb-info-item2').each((_, row) => { - const $row = $(row); - const onclick = $row.attr('onclick') || ''; - - const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/); - if (!hrefMatch) return; - - let href = hrefMatch[1]; - if (href.startsWith('/')) { - href = `https://njggzy.nanjing.gov.cn${href}`; - } - - const $titleP = $row.find('.ewb-info-num2').first().find('p'); - const title = $titleP.attr('title') || $titleP.text().trim(); - - const $dateP = $row.find('.ewb-info-num2').last().find('p'); - const dateText = $dateP.text().trim(); - const dateMatch = dateText.match(/\d{4}-\d{2}-\d{2}/); - const date = dateMatch ? dateMatch[0] : ''; - - if (title && date) { - items.push({ - title, - date, - href, - estimatedAmount: null - }); - } - }); - - return items; -} - -// 解析招标公告详情页,获取合同估算价 -async function fetchBidAnnounceDetail(url) { - try { - const html = await fetchHtml(url); - const $ = cheerio.load(html); - const bodyText = $('body').text(); - - const amountMatch = bodyText.match(/合同估算价[::]\s*([\d,]+\.?\d*)\s*元/); - let estimatedAmount = null; - if (amountMatch) { - const amountStr = amountMatch[1].replace(/,/g, ''); - estimatedAmount = parseFloat(amountStr); - } - - const bidCodeMatch = bodyText.match(/标段编码[::]\s*([A-Za-z0-9\-]+)/); - const bidCode = bidCodeMatch ? bidCodeMatch[1] : null; - - const tendereeMatch = bodyText.match(/招标人[为是][::]?\s*([^\s,,。]+)/); - const tenderee = tendereeMatch ? tendereeMatch[1] : null; - - const durationMatch = bodyText.match(/计划工期[::]\s*(\d+)\s*日历天/); - const duration = durationMatch ? parseInt(durationMatch[1]) : null; - - return { estimatedAmount, bidCode, tenderee, duration, url }; - } catch (error) { - console.error(`获取招标详情失败 ${url}: ${error.message}`); - return { estimatedAmount: null, url }; - } -} - -// 按时间范围采集招标公告 -async function fetchBidAnnounceByDateRange(startDate, endDate, maxPages = 20) { - const allItems = []; - let shouldContinue = true; - let pageIndex = 1; - - console.log(`开始采集招标公告: ${startDate || '不限'} 至 ${endDate || '不限'}`); - - while (shouldContinue && pageIndex <= maxPages) { - const pageUrl = getBidAnnouncePageUrl(pageIndex); - console.log(`正在采集招标公告第 ${pageIndex} 页: ${pageUrl}`); - - try { - const html = await fetchHtml(pageUrl); - const items = parseBidAnnounceList(html); - - if (items.length === 0) { - console.log(`第 ${pageIndex} 页没有数据,停止采集`); - break; - } - - let hasItemsInRange = false; - let allItemsBeforeRange = true; - - for (const item of items) { - if (isDateInRange(item.date, startDate, endDate)) { - allItems.push(item); - hasItemsInRange = true; - allItemsBeforeRange = false; - } else if (startDate && new Date(item.date) < new Date(startDate)) { - allItemsBeforeRange = allItemsBeforeRange && true; - } else { - allItemsBeforeRange = false; - } - } - - if (allItemsBeforeRange && startDate) { - console.log(`第 ${pageIndex} 页所有项目都早于起始日期,停止采集`); - shouldContinue = false; - } - - console.log(`第 ${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`); - - pageIndex++; - - if (shouldContinue && pageIndex <= maxPages) { - await new Promise(resolve => setTimeout(resolve, 500)); - } - } catch (err) { - console.error(`采集第 ${pageIndex} 页失败: ${err.message}`); - break; - } - } - - console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的招标公告`); - - // 获取详情(合同估算价) - if (allItems.length > 0) { - console.log(`开始获取 ${allItems.length} 条招标公告的详情...`); - - for (let i = 0; i < allItems.length; i++) { - const item = allItems[i]; - console.log(`获取详情 ${i + 1}/${allItems.length}: ${item.title.substring(0, 30)}...`); - - const detail = await fetchBidAnnounceDetail(item.href); - item.estimatedAmount = detail.estimatedAmount; - item.bidCode = detail.bidCode; - item.tenderee = detail.tenderee; - item.duration = detail.duration; - - if (i < allItems.length - 1) { - await new Promise(resolve => setTimeout(resolve, 300)); - } - } - - console.log('招标公告详情获取完成'); - } - - return allItems; -} - -// 定时任务执行函数 async function executeScheduledTask(config) { try { console.log('========================================'); - console.log('定时任务开始执行(综合采集)'); + console.log('定时任务开始执行'); console.log('执行时间:', new Date().toLocaleString('zh-CN')); console.log('========================================'); - const timeRange = config.scheduler.timeRange || 'thisMonth'; - const { startDate, endDate } = getDateRangeByType(timeRange); - const winningThreshold = config.scheduler.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000; // 中标阈值,默认1亿(10000万元) - const bidThreshold = config.scheduler.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0; // 招标阈值,默认0(不筛选) + // 获取所有已启用的抓取来源 + const scrapers = (config.scrapers || []).filter(s => s.enabled); - const timeRangeNames = { - 'today': '今日', - 'thisWeek': '本周', - 'thisMonth': '本月' - }; - console.log(`采集时间段: ${timeRangeNames[timeRange] || '本月'}`); - console.log(`采集时间范围: ${startDate} 至 ${endDate}`); - console.log(`中标金额阈值: ${winningThreshold}万元 (${(winningThreshold / 10000).toFixed(2)}亿元)`); - console.log(`招标金额阈值: ${bidThreshold}万元 ${bidThreshold === 0 ? '(不筛选)' : `(${(bidThreshold / 10000).toFixed(2)}亿元)`}`); - - // ========== 1. 采集中标公示 ========== - console.log('\n========== 采集中标公示 =========='); - const winningItems = await fetchListByDateRange(startDate, endDate, 50); - - // 筛选大于阈值的中标项目 - const winningFiltered = winningItems.filter((item) => { - return item.winningBid && item.winningBid.amount > winningThreshold; - }); - - const winningTotal = winningFiltered.reduce( - (sum, item) => sum + (item.winningBid?.amount || 0), - 0 - ); - - console.log(`中标公示: 采集 ${winningItems.length} 条,符合阈值 ${winningFiltered.length} 条`); - - // 生成中标报告 - const winningReport = { - summary: { - total_count: winningItems.length, - filtered_count: winningFiltered.length, - threshold: `${winningThreshold}万元`, - total_amount: `${winningTotal.toFixed(2)}万元`, - generated_at: new Date().toISOString(), - date_range: { startDate, endDate }, - }, - projects: winningFiltered.map((item) => ({ - bidNo: item.bidNo, - title: item.title, - bidName: item.bidName, - date: item.date, - winningBid: item.winningBid, - url: item.href, - })), - }; - - // ========== 2. 采集招标公告 ========== - console.log('\n========== 采集招标公告 =========='); - const bidItems = await fetchBidAnnounceByDateRange(startDate, endDate, 20); - - // 筛选招标项目(根据阈值筛选,阈值为0时不筛选只要求有金额) - const bidFiltered = bidItems.filter(item => { - if (!item.estimatedAmount) return false; - if (bidThreshold === 0) return true; // 阈值为0时不筛选 - return item.estimatedAmount / 10000 > bidThreshold; // 估算价是元,阈值是万元,需要转换 - }); - - const bidTotal = bidFiltered.reduce( - (sum, item) => sum + (item.estimatedAmount || 0), - 0 - ); - - console.log(`招标公告: 采集 ${bidItems.length} 条,有金额 ${bidFiltered.length} 条`); - - // 生成招标报告 - const bidReport = { - summary: { - total_count: bidItems.length, - filtered_count: bidFiltered.length, - has_amount_count: bidFiltered.length, - threshold: bidThreshold === 0 ? '无' : `${bidThreshold}万元`, - total_amount: `${(bidTotal / 10000).toFixed(2)}万元`, - total_amount_yuan: bidTotal, - generated_at: new Date().toISOString(), - date_range: { startDate, endDate }, - report_type: '招标公告' - }, - projects: bidFiltered.map((item) => ({ - title: item.title, - bidCode: item.bidCode, - tenderee: item.tenderee, - date: item.date, - duration: item.duration, - estimatedAmount: item.estimatedAmount ? { - amount: item.estimatedAmount, - amountWan: (item.estimatedAmount / 10000).toFixed(2), - unit: '元' - } : null, - url: item.href, - })), - }; - - // ========== 3. 检查是否有数据需要发送 ========== - if (winningFiltered.length === 0 && bidFiltered.length === 0) { - console.log('\n========================================'); - console.log('暂无符合条件的项目,不发送邮件'); - console.log('========================================'); + if (scrapers.length === 0) { + console.log('没有已启用的抓取来源,跳过'); return; } - // ========== 4. 发送综合邮件 ========== - console.log('\n========================================'); - console.log('正在发送综合报告邮件...'); - const emailConfig = config.email; + console.log(`共 ${scrapers.length} 个已启用的抓取来源`); - const result = await sendCombinedReportEmail(emailConfig, winningReport, bidReport); + // 逐个运行抓取任务 + const results = []; + for (const scraper of scrapers) { + try { + console.log(`\n---------- 抓取: ${scraper.city} - ${scraper.section} ${scraper.type} ----------`); + const r = await runScraper(scraper); + results.push(r); + console.log(`✓ 抓取成功`); + } catch (err) { + console.error(`✗ 抓取失败: ${err.message}`); + const errRecord = { + scraperId: scraper.id, + city: scraper.city, + section: scraper.section, + subsection: scraper.subsection, + type: scraper.type, + url: scraper.url, + scrapedAt: new Date().toISOString(), + error: err.message, + data: null, + }; + appendResult(errRecord); + results.push(errRecord); + } + } + + const successCount = results.filter(r => !r.error).length; + const failCount = results.filter(r => r.error).length; + console.log(`\n========== 抓取完成 ==========`); + console.log(`成功: ${successCount} 条,失败: ${failCount} 条`); + + // 检查是否需要发送邮件 + if (successCount === 0) { + console.log('没有成功的抓取结果,不发送邮件'); + return; + } + + // 发送邮件报告 + if (config.email?.smtpHost && config.email?.smtpUser) { + console.log('\n正在发送抓取结果邮件...'); + try { + const emailResult = await sendScraperResultsEmail(config.email, results); + console.log('邮件发送成功! MessageId:', emailResult.messageId); + } catch (emailErr) { + console.error('邮件发送失败:', emailErr.message); + } + } else { + console.log('邮件配置不完整,跳过邮件发送'); + } - console.log('邮件发送成功!'); - console.log('收件人:', emailConfig.recipients); - console.log('MessageId:', result.messageId); - console.log(`内容: 中标公示 ${winningFiltered.length} 条,招标公告 ${bidFiltered.length} 条`); - console.log('========================================'); - console.log('定时任务执行完成'); console.log('========================================'); } catch (error) { @@ -511,96 +202,60 @@ async function executeScheduledTask(config) { // 存储当前的定时任务 let currentScheduledTask = null; -// 初始化定时任务 export function initScheduler() { const config = loadConfig(); - - if (!config) { - console.error('无法启动定时任务: 配置文件加载失败'); - return; - } - - if (!config.scheduler || !config.scheduler.enabled) { - console.log('定时任务已禁用'); - return; - } - - if (!config.email || !config.email.smtpHost || !config.email.smtpUser) { - console.error('无法启动定时任务: 邮件配置不完整'); - console.error('请在 config.json 中配置邮件信息'); - return; - } + if (!config) { console.error('无法启动定时任务: 配置文件加载失败'); return; } + if (!config.scheduler?.enabled) { console.log('定时任务已禁用'); return; } const cronTime = config.scheduler.cronTime || '0 9 * * *'; - + const enabledCount = (config.scrapers || []).filter(s => s.enabled).length; console.log('========================================'); - console.log('定时任务已启动'); - console.log('执行计划:', cronTime); - console.log('中标阈值:', config.scheduler.winningThreshold, '万元'); - console.log('招标阈值:', config.scheduler.bidThreshold, '万元', config.scheduler.bidThreshold === 0 ? '(不筛选)' : ''); - console.log('收件人:', config.email.recipients); + console.log('定时任务已启动,执行计划:', cronTime); + console.log(`已启用的抓取来源: ${enabledCount} 个`); + if (config.email?.recipients) console.log('收件人:', config.email.recipients); console.log('========================================'); - // 如果已有任务在运行,先停止 - if (currentScheduledTask) { - currentScheduledTask.stop(); - console.log('已停止旧的定时任务'); - } + if (currentScheduledTask) { currentScheduledTask.stop(); } - // 创建定时任务 currentScheduledTask = cron.schedule(cronTime, () => { - executeScheduledTask(config); - }, { - timezone: 'Asia/Shanghai' - }); + // 每次执行时重新加载配置,确保使用最新的 scrapers + const latestConfig = loadConfig(); + if (latestConfig) { + executeScheduledTask(latestConfig); + } + }, { timezone: 'Asia/Shanghai' }); } -// 重新加载配置并重启定时任务 export function reloadScheduler() { console.log('重新加载定时任务配置...'); - - // 停止当前任务 - if (currentScheduledTask) { - currentScheduledTask.stop(); - currentScheduledTask = null; - console.log('已停止当前定时任务'); - } - - // 重新初始化 + if (currentScheduledTask) { currentScheduledTask.stop(); currentScheduledTask = null; } initScheduler(); } -// 停止定时任务 export function stopScheduler() { if (currentScheduledTask) { - currentScheduledTask.stop(); - currentScheduledTask = null; - console.log('定时任务已停止'); - return true; + currentScheduledTask.stop(); currentScheduledTask = null; + console.log('定时任务已停止'); return true; } return false; } -// 获取定时任务状态 export function getSchedulerStatus() { const config = loadConfig(); + const enabledScrapers = (config?.scrapers || []).filter(s => s.enabled).length; return { isRunning: currentScheduledTask !== null, + enabledScrapers, config: config ? { enabled: config.scheduler?.enabled || false, cronTime: config.scheduler?.cronTime || '0 9 * * *', - winningThreshold: config.scheduler?.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000, - bidThreshold: config.scheduler?.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0, - timeRange: config.scheduler?.timeRange || 'thisMonth', + description: config.scheduler?.description || '', } : null, }; } -// 手动执行任务(用于测试) export async function runTaskNow() { const config = loadConfig(); - if (!config) { - throw new Error('配置文件加载失败'); - } + if (!config) throw new Error('配置文件加载失败'); await executeScheduledTask(config); } diff --git a/src/server.js b/src/server.js index b2a4c87..64f64e5 100644 --- a/src/server.js +++ b/src/server.js @@ -1,12 +1,13 @@ import 'dotenv/config'; import express from 'express'; import cors from 'cors'; -import axios from 'axios'; -import * as cheerio from 'cheerio'; -import iconv from 'iconv-lite'; -import { sendReportEmail, sendBidAnnounceReportEmail, sendCombinedReportEmail } from './emailService.js'; +import Firecrawl from '@mendable/firecrawl-js'; +import { z } from 'zod'; +import { readFileSync, writeFileSync, existsSync } from 'fs'; +import { fileURLToPath } from 'url'; +import { dirname, join } from 'path'; +import { sendCombinedReportEmail } from './emailService.js'; import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js'; -import { log } from 'console'; const app = express(); const PORT = process.env.PORT || 5000; @@ -15,823 +16,364 @@ app.use(cors()); app.use(express.json()); app.use(express.static('public')); -// 南京市公共资源交易平台 - 交通水务中标结果公示 -const BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069008/'; +const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY }); -// 南京市公共资源交易平台 - 交通水务招标公告 -const BID_ANNOUNCE_BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069001/'; +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const CONFIG_PATH = join(__dirname, '..', 'config.json'); +const RESULTS_PATH = join(__dirname, '..', 'results.json'); -// 获取分页URL (实际数据在moreinfosl3.html中,分页为2.html, 3.html...) -function getPageUrl(pageIndex) { - if (pageIndex === 1) { - return `${BASE_URL}moreinfosl3.html`; +function readConfig() { + return JSON.parse(readFileSync(CONFIG_PATH, 'utf-8')); +} + +function saveConfig(cfg) { + writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2), 'utf-8'); +} + +// ========== 抓取结果存取 ========== + +function readResults() { + if (!existsSync(RESULTS_PATH)) return []; + try { + return JSON.parse(readFileSync(RESULTS_PATH, 'utf-8')); + } catch (e) { + return []; } - return `${BASE_URL}${pageIndex}.html`; } -// 获取招标公告分页URL -// 数据通过AJAX加载,第1页是 moreinfo5dc.html,第2页起是 2.html, 3.html... -function getBidAnnouncePageUrl(pageIndex) { - if (pageIndex === 1) { - return `${BID_ANNOUNCE_BASE_URL}moreinfo5dc.html`; +function saveResults(results) { + writeFileSync(RESULTS_PATH, JSON.stringify(results, null, 2), 'utf-8'); +} + +function appendResult(result) { + const results = readResults(); + results.unshift({ ...result, id: `result-${Date.now()}-${Math.random().toString(36).slice(2, 7)}` }); + // 最多保留 500 条 + if (results.length > 500) results.splice(500); + saveResults(results); +} + +// 查询结果(支持分页与筛选) +app.get('/api/results', (req, res) => { + try { + const { city, type, section, page = 1, pageSize = 20, scraperId } = req.query; + let results = readResults(); + if (city) results = results.filter(r => r.city === city); + if (type) results = results.filter(r => r.type === type); + if (section) results = results.filter(r => r.section === section); + if (scraperId) results = results.filter(r => r.scraperId === scraperId); + const total = results.length; + const start = (parseInt(page) - 1) * parseInt(pageSize); + const data = results.slice(start, start + parseInt(pageSize)); + res.json({ success: true, total, page: parseInt(page), pageSize: parseInt(pageSize), data }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } - return `${BID_ANNOUNCE_BASE_URL}${pageIndex}.html`; -} - -// 检查日期是否在范围内 -function isDateInRange(dateStr, startDate, endDate) { - if (!dateStr) return false; - const date = new Date(dateStr); - if (isNaN(date.getTime())) return false; - - if (startDate && date < new Date(startDate)) return false; - if (endDate && date > new Date(endDate)) return false; - return true; -} - -// 按时间范围采集多页列表 -async function fetchListByDateRange(startDate, endDate, maxPages = 50) { - const allItems = []; - let shouldContinue = true; - let pageIndex = 1; - - console.log(`开始按时间范围采集: ${startDate || '不限'} 至 ${endDate || '不限'}`); - - while (shouldContinue && pageIndex <= maxPages) { - const pageUrl = getPageUrl(pageIndex); - console.log(`正在采集第 ${pageIndex} 页: ${pageUrl}`); - - try { - const html = await fetchHtml(pageUrl); - const items = parseList(html); - - if (items.length === 0) { - console.log(`第 ${pageIndex} 页没有数据,停止采集`); - break; - } - - let hasItemsInRange = false; - let allItemsBeforeRange = true; - - for (const item of items) { - if (isDateInRange(item.date, startDate, endDate)) { - allItems.push(item); - hasItemsInRange = true; - allItemsBeforeRange = false; - } else if (startDate && new Date(item.date) < new Date(startDate)) { - allItemsBeforeRange = allItemsBeforeRange && true; - } else { - allItemsBeforeRange = false; - } - } - - if (allItemsBeforeRange && startDate) { - console.log(`第 ${pageIndex} 页所有项目都早于起始日期,停止采集`); - shouldContinue = false; - } - - console.log(`第 ${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`); - - pageIndex++; - - if (shouldContinue && pageIndex <= maxPages) { - await new Promise(resolve => setTimeout(resolve, 500)); - } - } catch (err) { - console.error(`采集第 ${pageIndex} 页失败: ${err.message}`); - break; - } - } - - console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的公告`); - return allItems; -} - -const http = axios.create({ - responseType: 'arraybuffer', - timeout: 15000, - headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', - }, }); -function pickEncoding(contentType = '') { - const match = /charset=([^;]+)/i.exec(contentType); - if (!match) return 'utf-8'; - const charset = match[1].trim().toLowerCase(); - if (charset.includes('gb')) return 'gbk'; - return charset; -} - -async function fetchHtml(url) { - const res = await http.get(url); - const encoding = pickEncoding(res.headers['content-type']); - const html = iconv.decode(res.data, encoding || 'utf-8'); - return html; -} - -// 解析列表页HTML,提取中标结果信息 -function parseList(html) { - const $ = cheerio.load(html); - const items = []; - - // 解析南京公共资源交易平台的列表结构(交通水务中标结果公示) - //
  • - $('li.ewb-info-item2').each((_, row) => { - const $row = $(row); - const cells = $row.find('div.ewb-info-num2'); - - if (cells.length >= 5) { - // 获取各字段 - const bidNo = $(cells[0]).find('p').attr('title') || $(cells[0]).find('p').text().trim(); - const projectName = $(cells[1]).find('p').attr('title') || $(cells[1]).find('p').text().trim(); - const bidName = $(cells[2]).find('p').attr('title') || $(cells[2]).find('p').text().trim(); - const winningPrice = $(cells[3]).find('p').text().trim(); // 中标价格 - const winningDate = $(cells[4]).find('p').text().trim(); // 中标日期 - - // 从onclick提取详情链接 - const onclick = $row.attr('onclick') || ''; - const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/); - let href = ''; - if (hrefMatch) { - href = hrefMatch[1]; - // 转换为绝对URL - if (href.startsWith('/')) { - href = `https://njggzy.nanjing.gov.cn${href}`; - } - } - - // 验证日期格式 (YYYY-MM-DD) - if (!/^\d{4}-\d{2}-\d{2}$/.test(winningDate)) return; - - // 解析中标价格 - const price = parseFloat(winningPrice); - if (isNaN(price)) return; - - items.push({ - bidNo, // 标段编号 - title: projectName, // 项目名称 - bidName, // 标段名称 - winningBid: { // 中标金额 - amount: price, - unit: '万元' - }, - date: winningDate, // 中标日期 - href - }); - } - }); - - return items; -} - -// 解析招标公告列表页HTML -function parseBidAnnounceList(html) { - const $ = cheerio.load(html); - const items = []; - - // 解析南京公共资源交易平台的招标公告列表结构 - //
  • - $('li.ewb-info-item2').each((_, row) => { - const $row = $(row); - const onclick = $row.attr('onclick') || ''; - - // 提取详情链接 - const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/); - if (!hrefMatch) return; - - let href = hrefMatch[1]; - if (href.startsWith('/')) { - href = `https://njggzy.nanjing.gov.cn${href}`; - } - - // 获取标题(从p标签的title属性或文本) - const $titleP = $row.find('.ewb-info-num2').first().find('p'); - const title = $titleP.attr('title') || $titleP.text().trim(); - - // 获取日期 - const $dateP = $row.find('.ewb-info-num2').last().find('p'); - const dateText = $dateP.text().trim(); - const dateMatch = dateText.match(/\d{4}-\d{2}-\d{2}/); - const date = dateMatch ? dateMatch[0] : ''; - - if (title && date) { - items.push({ - title, - date, - href, - estimatedAmount: null - }); - } - }); - - return items; -} - -// 解析招标公告详情页,获取合同估算价 -async function fetchBidAnnounceDetail(url) { +// 删除单条结果 +app.delete('/api/results/:id', (req, res) => { try { - const html = await fetchHtml(url); - const $ = cheerio.load(html); + const results = readResults(); + const before = results.length; + const updated = results.filter(r => r.id !== req.params.id); + if (updated.length === before) return res.status(404).json({ success: false, error: '未找到' }); + saveResults(updated); + res.json({ success: true }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); + } +}); - // 获取页面全部文本 - const bodyText = $('body').text(); +// 清空所有结果 +app.delete('/api/results', (req, res) => { + try { + saveResults([]); + res.json({ success: true, message: '已清空所有结果' }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); + } +}); - // 查找合同估算价 (格式如: 合同估算价:4,300,000.00 元) - const amountMatch = bodyText.match(/合同估算价[::]\s*([\d,]+\.?\d*)\s*元/); +// 获取结果的筛选选项(城市/板块/类型下拉枚举) +app.get('/api/results/filters', (req, res) => { + try { + const results = readResults(); + const cities = [...new Set(results.map(r => r.city).filter(Boolean))]; + const sections = [...new Set(results.map(r => r.section).filter(Boolean))]; + const types = [...new Set(results.map(r => r.type).filter(Boolean))]; + res.json({ success: true, data: { cities, sections, types } }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); + } +}); - let estimatedAmount = null; - if (amountMatch) { - // 去掉逗号,转换为数字(单位:元) - const amountStr = amountMatch[1].replace(/,/g, ''); - estimatedAmount = parseFloat(amountStr); - } +// ========== 抓取来源 CRUD ========== - // 获取标段编码 - const bidCodeMatch = bodyText.match(/标段编码[::]\s*([A-Za-z0-9\-]+)/); - const bidCode = bidCodeMatch ? bidCodeMatch[1] : null; +app.get('/api/scrapers', (req, res) => { + try { + const cfg = readConfig(); + res.json({ success: true, data: cfg.scrapers || [] }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); + } +}); - // 获取招标人 - const tendereeMatch = bodyText.match(/招标人[为是][::]?\s*([^\s,,。]+)/); - const tenderee = tendereeMatch ? tendereeMatch[1] : null; - - // 获取计划工期 - const durationMatch = bodyText.match(/计划工期[::]\s*(\d+)\s*日历天/); - const duration = durationMatch ? parseInt(durationMatch[1]) : null; - - return { - estimatedAmount, - bidCode, - tenderee, - duration, - url +app.post('/api/scrapers', (req, res) => { + try { + const cfg = readConfig(); + if (!cfg.scrapers) cfg.scrapers = []; + const item = { + id: `scraper-${Date.now()}`, + city: req.body.city || '', + url: req.body.url || '', + section: req.body.section || '', + subsection: req.body.subsection || '', + type: req.body.type || '招标公告', + prompt: req.body.prompt || '', + enabled: req.body.enabled !== false, + model: req.body.model || 'spark-1-mini', }; - } catch (error) { - console.error(`获取招标详情失败 ${url}: ${error.message}`); - return { estimatedAmount: null, url }; - } -} - -// 按时间范围采集招标公告 -async function fetchBidAnnounceByDateRange(startDate, endDate, maxPages = 20, fetchDetails = true) { - const allItems = []; - let shouldContinue = true; - let pageIndex = 1; - - console.log(`开始采集招标公告: ${startDate || '不限'} 至 ${endDate || '不限'}`); - - while (shouldContinue && pageIndex <= maxPages) { - const pageUrl = getBidAnnouncePageUrl(pageIndex); - console.log(`正在采集招标公告第 ${pageIndex} 页: ${pageUrl}`); - - try { - const html = await fetchHtml(pageUrl); - const items = parseBidAnnounceList(html); - - if (items.length === 0) { - console.log(`第 ${pageIndex} 页没有数据,停止采集`); - break; - } - - let hasItemsInRange = false; - let allItemsBeforeRange = true; - - for (const item of items) { - if (isDateInRange(item.date, startDate, endDate)) { - allItems.push(item); - hasItemsInRange = true; - allItemsBeforeRange = false; - } else if (startDate && new Date(item.date) < new Date(startDate)) { - allItemsBeforeRange = allItemsBeforeRange && true; - } else { - allItemsBeforeRange = false; - } - } - - if (allItemsBeforeRange && startDate) { - console.log(`第 ${pageIndex} 页所有项目都早于起始日期,停止采集`); - shouldContinue = false; - } - - console.log(`第 ${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`); - - pageIndex++; - - if (shouldContinue && pageIndex <= maxPages) { - await new Promise(resolve => setTimeout(resolve, 500)); - } - } catch (err) { - console.error(`采集第 ${pageIndex} 页失败: ${err.message}`); - break; - } - } - - console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的招标公告`); - - // 如果需要获取详情(合同估算价) - if (fetchDetails && allItems.length > 0) { - console.log(`开始获取 ${allItems.length} 条招标公告的详情...`); - - for (let i = 0; i < allItems.length; i++) { - const item = allItems[i]; - console.log(`获取详情 ${i + 1}/${allItems.length}: ${item.title.substring(0, 30)}...`); - - const detail = await fetchBidAnnounceDetail(item.href); - item.estimatedAmount = detail.estimatedAmount; - item.bidCode = detail.bidCode; - item.tenderee = detail.tenderee; - item.duration = detail.duration; - - // 添加延迟避免请求过快 - if (i < allItems.length - 1) { - await new Promise(resolve => setTimeout(resolve, 300)); - } - } - - console.log('详情获取完成'); - } - - return allItems; -} - -// API 路由 - -// 获取列表 -app.get('/api/list', async (req, res) => { - try { - const page = parseInt(req.query.page) || 1; - const pageUrl = getPageUrl(page); - - - const html = await fetchHtml(pageUrl); - const items = parseList(html); - res.json({ success: true, data: items, page }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); + cfg.scrapers.push(item); + saveConfig(cfg); + res.json({ success: true, data: item }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); -// 按时间范围获取列表 -app.post('/api/list-daterange', async (req, res) => { +app.put('/api/scrapers/:id', (req, res) => { try { - const { startDate, endDate, maxPages = 50 } = req.body; - const items = await fetchListByDateRange(startDate, endDate, maxPages); - res.json({ success: true, data: items }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); + const cfg = readConfig(); + const idx = (cfg.scrapers || []).findIndex(s => s.id === req.params.id); + if (idx === -1) return res.status(404).json({ success: false, error: '未找到该配置' }); + cfg.scrapers[idx] = { ...cfg.scrapers[idx], ...req.body, id: req.params.id }; + saveConfig(cfg); + res.json({ success: true, data: cfg.scrapers[idx] }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); -// 生成报告 -app.post('/api/report', async (req, res) => { +app.delete('/api/scrapers/:id', (req, res) => { try { - const { limit = 50, threshold = 50 } = req.body; + const cfg = readConfig(); + const before = (cfg.scrapers || []).length; + cfg.scrapers = (cfg.scrapers || []).filter(s => s.id !== req.params.id); + if (cfg.scrapers.length === before) return res.status(404).json({ success: false, error: '未找到' }); + saveConfig(cfg); + res.json({ success: true }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); + } +}); - // 采集列表 - const items = []; - let pageIndex = 1; - const maxPagesToFetch = Math.ceil(limit / 10) + 1; +// ========== 统一抓取执行 ========== - while (items.length < limit && pageIndex <= maxPagesToFetch) { - const pageUrl = getPageUrl(pageIndex); - console.log(`正在采集第 ${pageIndex} 页: ${pageUrl}`); +// 公告抓取 Schema(result 包装数组) +const announcementSchema = z.object({ + result: z.array(z.object({ + title: z.string().describe('公告标题'), + amount: z.string().nullable().describe('项目金额(合同预估价/最高投标限价等),没有则为null'), + date: z.string().describe('发布日期,YYYY-MM-DD格式'), + url: z.string().describe('详情页完整URL,以https://开头'), + })).describe('页面上提取到的所有公告条目'), +}); + +/** + * 从 Firecrawl agent 返回结果中提取 result 数组 + * 优先取 root.result,再回退数字键处理 + */ +function extractItems(raw) { + if (!raw) return []; + const root = (raw.data && typeof raw.data === 'object') ? raw.data : raw; + // 最优先:result 是真正数组 + if (Array.isArray(root.result)) return root.result; + // result 是数字键对象 + if (root.result && typeof root.result === 'object') { + const keys = Object.keys(root.result).filter(k => !isNaN(parseInt(k))); + if (keys.length > 0) return keys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root.result[k]); + } + // 如果 root 本身是数组 + if (Array.isArray(root)) return root; + // 顶层数字键回退 + const numericKeys = Object.keys(root).filter(k => !isNaN(parseInt(k))); + if (numericKeys.length > 0) return numericKeys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root[k]); + return []; +} + +// 执行单个抓取来源并保存结果 +async function runScraper(scraper) { + console.log(`[Agent] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`); + const fullPrompt = `访问这个URL: ${scraper.url} +【目标区域】:${scraper.section || ''} - ${scraper.subsection || ''} +【公告类型】:${scraper.type || ''} + +${scraper.prompt || '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL'} + +请严格按照定义的 JSON 格式返回,每条公告包含 title、amount、date、url 四个字段。`; + console.log(fullPrompt, 'fullPrompt======='); + + const result = await firecrawl.agent({ + prompt: fullPrompt, + schema: announcementSchema, + model: scraper.model || 'spark-1-mini', + }); + + console.log('[Agent] 原始返回结果:', JSON.stringify(result).slice(0, 500)); + + const rawItems = extractItems(result); + const items = rawItems.map(item => ({ + title: item.title || '', + amount: item.amount || null, + date: item.date || '', + url: item.url || '', + })); + + console.log(`[Agent] 提取到 ${items.length} 条公告`); + + const record = { + scraperId: scraper.id, + city: scraper.city, + section: scraper.section, + subsection: scraper.subsection, + type: scraper.type, + url: scraper.url, + scrapedAt: new Date().toISOString(), + data: { result: items, total: items.length }, // 统一为 result 字段 + }; + appendResult(record); + return record; +} + +// 运行指定 ID 的抓取来源(单条测试) +app.post('/api/scrapers/:id/run', async (req, res) => { + try { + const cfg = readConfig(); + const scraper = (cfg.scrapers || []).find(s => s.id === req.params.id); + if (!scraper) return res.status(404).json({ success: false, error: '未找到该配置' }); + const result = await runScraper(scraper); + res.json({ success: true, data: result }); + } catch (e) { + console.error('测试抓取失败:', e.message); + res.status(500).json({ success: false, error: e.message }); + } +}); + +// 批量运行多个抓取来源 +// body: { ids: ['id1','id2',...] } 不传则运行所有已启用的 +app.post('/api/scrape/run', async (req, res) => { + try { + const cfg = readConfig(); + let scrapers = cfg.scrapers || []; + + if (req.body.ids && req.body.ids.length > 0) { + scrapers = scrapers.filter(s => req.body.ids.includes(s.id)); + } else { + scrapers = scrapers.filter(s => s.enabled); + } + + if (scrapers.length === 0) { + return res.json({ success: true, data: [], message: '没有可运行的抓取来源' }); + } + + const results = []; + for (const scraper of scrapers) { try { - const html = await fetchHtml(pageUrl); - const pageItems = parseList(html); - - if (pageItems.length === 0) { - console.log(`第 ${pageIndex} 页没有数据,停止采集`); - break; - } - - items.push(...pageItems); - pageIndex++; - - if (items.length < limit && pageIndex <= maxPagesToFetch) { - await new Promise(resolve => setTimeout(resolve, 500)); - } + const r = await runScraper(scraper); + results.push(r); } catch (err) { - console.error(`采集第 ${pageIndex} 页失败: ${err.message}`); - break; + const errRecord = { + scraperId: scraper.id, + city: scraper.city, + section: scraper.section, + subsection: scraper.subsection, + type: scraper.type, + url: scraper.url, + scrapedAt: new Date().toISOString(), + error: err.message, + data: null, + }; + appendResult(errRecord); + results.push(errRecord); } } - const results = items.slice(0, limit); - - // 按阈值筛选 - const filtered = results.filter((item) => { - return item.winningBid && item.winningBid.amount > threshold; - }); - - const total = filtered.reduce( - (sum, item) => sum + (item.winningBid?.amount || 0), - 0 - ); - - const report = { - summary: { - total_count: results.length, - filtered_count: filtered.length, - threshold: `${threshold}万元`, - total_amount: `${total.toFixed(2)}万元`, - generated_at: new Date().toISOString(), - }, - projects: filtered.map((item) => ({ - bidNo: item.bidNo, - title: item.title, - bidName: item.bidName, - date: item.date, - winningBid: item.winningBid, - url: item.href, - })), - }; - - res.json({ success: true, data: report }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); + res.json({ success: true, data: results }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); -// 按时间范围生成报告 -app.post('/api/report-daterange', async (req, res) => { +// ========== 配置管理 ========== + +app.get('/api/config', (req, res) => { try { - const { startDate, endDate, threshold = 50, maxPages = 50 } = req.body; - - // 按时间范围采集列表 - const items = await fetchListByDateRange(startDate, endDate, maxPages); - - if (items.length === 0) { - return res.json({ - success: true, - data: { - summary: { - total_count: 0, - filtered_count: 0, - threshold: `${threshold}万元`, - total_amount: '0.00万元', - generated_at: new Date().toISOString(), - date_range: { startDate, endDate }, - }, - projects: [], - }, - }); - } - - // 按阈值筛选 - const filtered = items.filter((item) => { - return item.winningBid && item.winningBid.amount > threshold; - }); - - const total = filtered.reduce( - (sum, item) => sum + (item.winningBid?.amount || 0), - 0 - ); - - const report = { - summary: { - total_count: items.length, - filtered_count: filtered.length, - threshold: `${threshold}万元`, - total_amount: `${total.toFixed(2)}万元`, - generated_at: new Date().toISOString(), - date_range: { startDate, endDate }, - }, - projects: filtered.map((item) => ({ - bidNo: item.bidNo, - title: item.title, - bidName: item.bidName, - date: item.date, - winningBid: item.winningBid, - url: item.href, - })), - }; - - res.json({ success: true, data: report }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); + const cfg = readConfig(); + if (cfg.email?.smtpPass) cfg.email.smtpPass = '***已配置***'; + res.json({ success: true, data: cfg }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); -// ========== 招标公告相关API ========== - -// 获取招标公告列表(简单列表,按页码) -app.get('/api/bid-announce/list', async (req, res) => { +app.post('/api/config', (req, res) => { try { - const page = parseInt(req.query.page) || 1; - const pageUrl = getBidAnnouncePageUrl(page); - - const html = await fetchHtml(pageUrl); - const items = parseBidAnnounceList(html); - res.json({ success: true, data: items, page }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); + const newCfg = req.body; + const oldCfg = readConfig(); + if (newCfg.email?.smtpPass === '***已配置***') { + newCfg.email.smtpPass = oldCfg.email?.smtpPass || ''; + } + saveConfig(newCfg); + reloadScheduler(); + res.json({ success: true, message: '配置已保存' }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); -// 获取招标公告列表(按时间范围) -app.post('/api/bid-announce/list', async (req, res) => { - try { - const { startDate, endDate, maxPages = 20, fetchDetails = false } = req.body; - const items = await fetchBidAnnounceByDateRange(startDate, endDate, maxPages, fetchDetails); - res.json({ success: true, data: items }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); - } -}); +// ========== 邮件 ========== -// 生成招标公告报告(含金额统计) -app.post('/api/bid-announce/report', async (req, res) => { - try { - const { startDate, endDate, threshold = 0, maxPages = 20 } = req.body; - - // 采集招标公告(包含详情) - const items = await fetchBidAnnounceByDateRange(startDate, endDate, maxPages, true); - - if (items.length === 0) { - return res.json({ - success: true, - data: { - summary: { - total_count: 0, - filtered_count: 0, - threshold: threshold > 0 ? `${threshold}元` : '无', - total_amount: '0.00元', - generated_at: new Date().toISOString(), - date_range: { startDate, endDate }, - report_type: '招标公告' - }, - projects: [], - }, - }); - } - - // 按阈值筛选(阈值单位为元) - const filtered = threshold > 0 - ? items.filter(item => item.estimatedAmount && item.estimatedAmount >= threshold) - : items.filter(item => item.estimatedAmount); - - // 计算总金额 - const total = filtered.reduce((sum, item) => sum + (item.estimatedAmount || 0), 0); - - const report = { - summary: { - total_count: items.length, - filtered_count: filtered.length, - has_amount_count: items.filter(i => i.estimatedAmount).length, - threshold: threshold > 0 ? `${(threshold / 10000).toFixed(2)}万元` : '无', - total_amount: `${(total / 10000).toFixed(2)}万元`, - total_amount_yuan: total, - generated_at: new Date().toISOString(), - date_range: { startDate, endDate }, - report_type: '招标公告' - }, - projects: filtered.map((item) => ({ - title: item.title, - bidCode: item.bidCode, - tenderee: item.tenderee, - date: item.date, - duration: item.duration, - estimatedAmount: item.estimatedAmount ? { - amount: item.estimatedAmount, - amountWan: (item.estimatedAmount / 10000).toFixed(2), - unit: '元' - } : null, - url: item.href, - })), - }; - - res.json({ success: true, data: report }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); - } -}); - -// 发送招标公告报告邮件 -app.post('/api/bid-announce/send-email', async (req, res) => { - try { - const { emailConfig, report } = req.body; - - if (!emailConfig || !emailConfig.smtpHost || !emailConfig.smtpUser || !emailConfig.smtpPass) { - return res.status(400).json({ - success: false, - error: '邮件配置不完整,请填写SMTP服务器、用户名和密码', - }); - } - - if (!emailConfig.recipients || emailConfig.recipients.trim() === '') { - return res.status(400).json({ - success: false, - error: '请至少指定一个收件人', - }); - } - - if (!report) { - return res.status(400).json({ - success: false, - error: '没有可发送的报告数据', - }); - } - - // 使用招标公告专用的邮件发送 - const result = await sendBidAnnounceReportEmail(emailConfig, report); - - res.json({ - success: true, - message: '招标公告报告邮件发送成功', - messageId: result.messageId, - }); - } catch (error) { - console.error('发送招标公告邮件API错误:', error); - res.status(500).json({ - success: false, - error: error.message, - }); - } -}); - -// 发送综合报告邮件(中标+招标) -app.post('/api/send-combined-email', async (req, res) => { - try { - const { emailConfig, winningReport, bidReport } = req.body; - - if (!emailConfig || !emailConfig.smtpHost || !emailConfig.smtpUser || !emailConfig.smtpPass) { - return res.status(400).json({ - success: false, - error: '邮件配置不完整,请填写SMTP服务器、用户名和密码', - }); - } - - if (!emailConfig.recipients || emailConfig.recipients.trim() === '') { - return res.status(400).json({ - success: false, - error: '请至少指定一个收件人', - }); - } - - if (!winningReport && !bidReport) { - return res.status(400).json({ - success: false, - error: '没有可发送的报告数据', - }); - } - - // 发送综合邮件 - const result = await sendCombinedReportEmail(emailConfig, winningReport, bidReport); - - res.json({ - success: true, - message: '综合报告邮件发送成功', - messageId: result.messageId, - }); - } catch (error) { - console.error('发送综合邮件API错误:', error); - res.status(500).json({ - success: false, - error: error.message, - }); - } -}); - -// 发送报告邮件 app.post('/api/send-email', async (req, res) => { try { const { emailConfig, report } = req.body; + if (!emailConfig?.smtpHost || !emailConfig?.smtpUser || !emailConfig?.smtpPass) + return res.status(400).json({ success: false, error: '邮件配置不完整' }); + if (!emailConfig.recipients?.trim()) + return res.status(400).json({ success: false, error: '请指定收件人' }); + if (!report) + return res.status(400).json({ success: false, error: '没有报告数据' }); - // 验证必需的配置参数 - if (!emailConfig || !emailConfig.smtpHost || !emailConfig.smtpUser || !emailConfig.smtpPass) { - return res.status(400).json({ - success: false, - error: '邮件配置不完整,请填写SMTP服务器、用户名和密码', - }); - } - - if (!emailConfig.recipients || emailConfig.recipients.trim() === '') { - return res.status(400).json({ - success: false, - error: '请至少指定一个收件人', - }); - } - - if (!report) { - return res.status(400).json({ - success: false, - error: '没有可发送的报告数据', - }); - } - - // 发送邮件 + const { sendReportEmail } = await import('./emailService.js'); const result = await sendReportEmail(emailConfig, report); - - res.json({ - success: true, - message: '邮件发送成功', - messageId: result.messageId, - }); - } catch (error) { - console.error('发送邮件API错误:', error); - res.status(500).json({ - success: false, - error: error.message, - }); + res.json({ success: true, message: '邮件发送成功', messageId: result.messageId }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); -// 获取配置 -app.get('/api/config', async (req, res) => { +// ========== 定时任务 ========== + +app.get('/api/scheduler/status', (req, res) => { try { - const { readFileSync } = await import('fs'); - const { join } = await import('path'); - const { fileURLToPath } = await import('url'); - const { dirname } = await import('path'); - - const __filename = fileURLToPath(import.meta.url); - const __dirname = dirname(__filename); - const configPath = join(__dirname, '..', 'config.json'); - - const configContent = readFileSync(configPath, 'utf-8'); - const config = JSON.parse(configContent); - - // 不返回敏感信息(密码) - if (config.email && config.email.smtpPass) { - config.email.smtpPass = '***已配置***'; - } - - res.json({ success: true, data: config }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); + res.json({ success: true, data: getSchedulerStatus() }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); -// 更新配置 -app.post('/api/config', async (req, res) => { +app.post('/api/run-scheduled-task', (req, res) => { try { - const { writeFileSync, readFileSync } = await import('fs'); - const { join } = await import('path'); - const { fileURLToPath } = await import('url'); - const { dirname } = await import('path'); - - const __filename = fileURLToPath(import.meta.url); - const __dirname = dirname(__filename); - const configPath = join(__dirname, '..', 'config.json'); - - const newConfig = req.body; - - // 读取旧配置以保留敏感信息 - const oldConfigContent = readFileSync(configPath, 'utf-8'); - const oldConfig = JSON.parse(oldConfigContent); - - // 如果密码字段是占位符,保留原密码 - if (newConfig.email && newConfig.email.smtpPass === '***已配置***') { - newConfig.email.smtpPass = oldConfig.email?.smtpPass || ''; - } - - // 保存配置 - writeFileSync(configPath, JSON.stringify(newConfig, null, 2), 'utf-8'); - - // 重新加载定时任务(如果定时任务配置有变化) - reloadScheduler(); - - res.json({ success: true, message: '配置已保存并重新加载定时任务' }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); - } -}); - -// 获取定时任务状态 -app.get('/api/scheduler/status', async (req, res) => { - try { - const status = getSchedulerStatus(); - res.json({ success: true, data: status }); - } catch (error) { - res.status(500).json({ success: false, error: error.message }); - } -}); - -// 手动触发定时任务的API(用于测试) -app.post('/api/run-scheduled-task', async (req, res) => { - try { - console.log('手动触发定时任务...'); - // 在后台执行任务,不阻塞响应 - runTaskNow().catch(err => { - console.error('定时任务执行失败:', err); - }); - res.json({ - success: true, - message: '定时任务已触发,正在后台执行...' - }); - } catch (error) { - res.status(500).json({ - success: false, - error: error.message - }); + runTaskNow().catch(err => console.error('定时任务执行失败:', err)); + res.json({ success: true, message: '定时任务已在后台触发' }); + } catch (e) { + res.status(500).json({ success: false, error: e.message }); } }); app.listen(PORT, () => { console.log(`Server running at http://localhost:${PORT}`); - - // 启动定时任务 - console.log('正在初始化定时任务...'); initScheduler(); });