feat: 使用firecrawl 实现公告抓取与分析工具的网页界面,包括报告生成、导出和邮件发送功能。

This commit is contained in:
2026-03-06 15:37:56 +08:00
parent e3766b86be
commit ad659c4ff0
11 changed files with 3190 additions and 1490 deletions

View File

@@ -1,11 +1,15 @@
# 服务器端口配置 # 服务器端口配置
PORT=5000 PORT=5000
# Firecrawl API Key在 https://www.firecrawl.dev/app/api-keys 获取)
FIRECRAWL_API_KEY=fc-your-api-key-here
# 环境说明: # 环境说明:
# - 开发环境:通常使用 5000 # - 开发环境:通常使用 5000
# - 生产环境:可以使用 80、8080 等 # - 生产环境:可以使用 80、8080 等
# #
# 使用方法: # 使用方法:
# 1. 复制此文件为 .env # 1. 复制此文件为 .env
# 2. 修改端口号 # 2. 填写 FIRECRAWL_API_KEY
# 3. 启动服务时会自动读取 # 3. 修改端口号(可选)
# 4. 启动服务时会自动读取

View File

@@ -5,7 +5,7 @@
"winningThreshold": 0, "winningThreshold": 0,
"bidThreshold": 0, "bidThreshold": 0,
"description": "每天9点采集当日项目", "description": "每天9点采集当日项目",
"timeRange": "thisMonth" "timeRange": "today"
}, },
"email": { "email": {
"smtpHost": "smtp.qq.com", "smtpHost": "smtp.qq.com",
@@ -13,5 +13,29 @@
"smtpUser": "1076597680@qq.com", "smtpUser": "1076597680@qq.com",
"smtpPass": "nfrjdiraqddsjeeh", "smtpPass": "nfrjdiraqddsjeeh",
"recipients": "5482498@qq.com" "recipients": "5482498@qq.com"
} },
"scrapers": [
{
"id": "scraper-1772762354799",
"city": "无锡市",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"prompt": "提取页面上今天的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL",
"enabled": true,
"model": "spark-1-mini"
},
{
"id": "scraper-1772762494299",
"city": "南京市",
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/buildService1.html",
"section": "房建市政",
"subsection": "工程类",
"type": "招标公告",
"prompt": "提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL",
"enabled": false,
"model": "spark-1-mini"
}
]
} }

64
package-lock.json generated
View File

@@ -8,14 +8,38 @@
"name": "njggzy-scraper", "name": "njggzy-scraper",
"version": "2.0.0", "version": "2.0.0",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "@mendable/firecrawl-js": "^4.15.2",
"cheerio": "^1.0.0-rc.12", "cheerio": "^1.0.0-rc.12",
"cors": "^2.8.5", "cors": "^2.8.5",
"dotenv": "^17.2.3", "dotenv": "^17.2.3",
"express": "^5.2.1", "express": "^5.2.1",
"iconv-lite": "^0.6.3",
"node-cron": "^4.2.1", "node-cron": "^4.2.1",
"nodemailer": "^7.0.11" "nodemailer": "^7.0.11",
"zod": "^4.3.6"
}
},
"node_modules/@mendable/firecrawl-js": {
"version": "4.15.2",
"resolved": "https://registry.npmmirror.com/@mendable/firecrawl-js/-/firecrawl-js-4.15.2.tgz",
"integrity": "sha512-J+lfnJpd00irDhy5ZJE58lsdqbc1fC1d7X6/UyF4VFASEGy1GDpR0FuVweasEpFfOhEGS5DZ+dq8Ui21zIFrOw==",
"license": "MIT",
"dependencies": {
"axios": "^1.13.5",
"typescript-event-target": "^1.1.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
"engines": {
"node": ">=22.0.0"
}
},
"node_modules/@mendable/firecrawl-js/node_modules/zod": {
"version": "3.25.76",
"resolved": "https://registry.npmmirror.com/zod/-/zod-3.25.76.tgz",
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
} }
}, },
"node_modules/accepts": { "node_modules/accepts": {
@@ -63,13 +87,13 @@
"license": "MIT" "license": "MIT"
}, },
"node_modules/axios": { "node_modules/axios": {
"version": "1.13.2", "version": "1.13.6",
"resolved": "https://registry.npmmirror.com/axios/-/axios-1.13.2.tgz", "resolved": "https://registry.npmmirror.com/axios/-/axios-1.13.6.tgz",
"integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==", "integrity": "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"follow-redirects": "^1.15.6", "follow-redirects": "^1.15.11",
"form-data": "^4.0.4", "form-data": "^4.0.5",
"proxy-from-env": "^1.1.0" "proxy-from-env": "^1.1.0"
} }
}, },
@@ -1331,6 +1355,12 @@
"url": "https://opencollective.com/express" "url": "https://opencollective.com/express"
} }
}, },
"node_modules/typescript-event-target": {
"version": "1.1.2",
"resolved": "https://registry.npmmirror.com/typescript-event-target/-/typescript-event-target-1.1.2.tgz",
"integrity": "sha512-TvkrTUpv7gCPlcnSoEwUVUBwsdheKm+HF5u2tPAKubkIGMfovdSizCTaZRY/NhR8+Ijy8iZZUapbVQAsNrkFrw==",
"license": "MIT"
},
"node_modules/undici": { "node_modules/undici": {
"version": "7.16.0", "version": "7.16.0",
"resolved": "https://registry.npmmirror.com/undici/-/undici-7.16.0.tgz", "resolved": "https://registry.npmmirror.com/undici/-/undici-7.16.0.tgz",
@@ -1384,6 +1414,24 @@
"resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz", "resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"license": "ISC" "license": "ISC"
},
"node_modules/zod": {
"version": "4.3.6",
"resolved": "https://registry.npmmirror.com/zod/-/zod-4.3.6.tgz",
"integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
},
"node_modules/zod-to-json-schema": {
"version": "3.25.1",
"resolved": "https://registry.npmmirror.com/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz",
"integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==",
"license": "ISC",
"peerDependencies": {
"zod": "^3.25 || ^4"
}
} }
} }
} }

View File

@@ -8,13 +8,12 @@
"start": "node src/server.js" "start": "node src/server.js"
}, },
"dependencies": { "dependencies": {
"axios": "^1.6.8", "@mendable/firecrawl-js": "latest",
"cheerio": "^1.0.0-rc.12",
"cors": "^2.8.5", "cors": "^2.8.5",
"dotenv": "^17.2.3", "dotenv": "^17.2.3",
"express": "^5.2.1", "express": "^5.2.1",
"iconv-lite": "^0.6.3",
"node-cron": "^4.2.1", "node-cron": "^4.2.1",
"nodemailer": "^7.0.11" "nodemailer": "^7.0.11",
"zod": "^3.24.2"
} }
} }

View File

@@ -687,51 +687,34 @@ function cronToFriendlyText(cronTime) {
// 加载定时任务配置 // 加载定时任务配置
async function loadSchedulerConfig() { async function loadSchedulerConfig() {
try { try {
// 从服务器获取配置
const response = await fetch(`${API_BASE}/config`); const response = await fetch(`${API_BASE}/config`);
const data = await response.json(); const data = await response.json();
if (data.success && data.data) { if (data.success && data.data) {
const config = data.data; const config = data.data;
// 填充表单
if (config.scheduler) { if (config.scheduler) {
document.getElementById('schedulerEnabled').checked = config.scheduler.enabled || false; document.getElementById('schedulerEnabled').checked = config.scheduler.enabled || false;
const cronTime = config.scheduler.cronTime || '0 9 * * *'; const cronTime = config.scheduler.cronTime || '0 9 * * *';
document.getElementById('schedulerCronInput').value = cronTime; document.getElementById('schedulerCronInput').value = cronTime;
document.getElementById('schedulerWinningThresholdInput').value = config.scheduler.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000; document.getElementById('schedulerThresholdInput').value = config.scheduler.threshold ?? 0;
document.getElementById('schedulerBidThresholdInput').value = config.scheduler.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0;
document.getElementById('schedulerDescription').value = config.scheduler.description || ''; document.getElementById('schedulerDescription').value = config.scheduler.description || '';
// 时间段配置
document.getElementById('schedulerTimeRange').value = config.scheduler.timeRange || 'thisMonth';
// 反向映射Cron表达式到预设选择器 // 反向映射Cron表达式到预设选择器
const presetSelector = document.getElementById('schedulerCronPreset'); const presetSelector = document.getElementById('schedulerCronPreset');
const customGroup = document.getElementById('customCronGroup'); const customGroup = document.getElementById('customCronGroup');
// 预设值列表
const presets = [ const presets = [
'0 9 * * *', '0 9 * * *', '0 6 * * *', '0 12 * * *', '0 18 * * *',
'0 6 * * *', '0 9,18 * * *', '0 */6 * * *', '0 */12 * * *', '0 9 * * 1', '0 9 1 * *'
'0 12 * * *',
'0 18 * * *',
'0 9,18 * * *',
'0 */6 * * *',
'0 */12 * * *',
'0 9 * * 1',
'0 9 1 * *'
]; ];
// 检查是否匹配预设值
if (presets.includes(cronTime)) { if (presets.includes(cronTime)) {
presetSelector.value = cronTime; presetSelector.value = cronTime;
customGroup.style.display = 'none'; customGroup.style.display = 'none';
} else { } else {
// 自定义时间 - 尝试解析为 "分 时 * * *" 格式
presetSelector.value = 'custom'; presetSelector.value = 'custom';
customGroup.style.display = 'block'; customGroup.style.display = 'block';
const cronParts = cronTime.split(/\s+/); const cronParts = cronTime.split(/\s+/);
if (cronParts.length >= 2) { if (cronParts.length >= 2) {
document.getElementById('customMinute').value = cronParts[0]; document.getElementById('customMinute').value = cronParts[0];
@@ -740,7 +723,6 @@ async function loadSchedulerConfig() {
} }
} }
// 更新状态显示
await updateSchedulerStatus(); await updateSchedulerStatus();
} }
} catch (error) { } catch (error) {
@@ -776,7 +758,7 @@ function updateCustomCron() {
cronInput.value = `${minute} ${hour} * * *`; cronInput.value = `${minute} ${hour} * * *`;
} }
document.addEventListener('DOMContentLoaded', function() { document.addEventListener('DOMContentLoaded', function () {
// 并行加载配置,提高加载速度 // 并行加载配置,提高加载速度
Promise.all([ Promise.all([
loadEmailConfig().catch(err => console.error('加载邮件配置失败:', err)), loadEmailConfig().catch(err => console.error('加载邮件配置失败:', err)),
@@ -813,20 +795,12 @@ async function updateSchedulerStatus() {
// 更新执行计划 // 更新执行计划
if (status.config) { if (status.config) {
document.getElementById('schedulerCronTime').textContent = cronToFriendlyText(status.config.cronTime); document.getElementById('schedulerCronTime').textContent = cronToFriendlyText(status.config.cronTime);
const winningThreshold = status.config.winningThreshold; }
if (winningThreshold === 0) {
document.getElementById('schedulerWinningThreshold').textContent = '不筛选'; // 更新已启用来源数
} else { const enabledCountEl = document.getElementById('schedulerEnabledCount');
const winningBillion = (winningThreshold / 10000).toFixed(1); if (enabledCountEl) {
document.getElementById('schedulerWinningThreshold').textContent = `${winningThreshold}万元 (${winningBillion}亿)`; enabledCountEl.textContent = `${status.enabledScrapers ?? '-'}`;
}
const bidThreshold = status.config.bidThreshold;
if (bidThreshold === 0) {
document.getElementById('schedulerBidThreshold').textContent = '不筛选';
} else {
const bidBillion = (bidThreshold / 10000).toFixed(1);
document.getElementById('schedulerBidThreshold').textContent = `${bidThreshold}万元 (${bidBillion}亿)`;
}
} }
} }
} catch (error) { } catch (error) {
@@ -839,10 +813,8 @@ async function saveSchedulerConfig() {
const schedulerConfig = { const schedulerConfig = {
enabled: document.getElementById('schedulerEnabled').checked, enabled: document.getElementById('schedulerEnabled').checked,
cronTime: document.getElementById('schedulerCronInput').value, cronTime: document.getElementById('schedulerCronInput').value,
winningThreshold: parseInt(document.getElementById('schedulerWinningThresholdInput').value), threshold: parseInt(document.getElementById('schedulerThresholdInput').value) || 0,
bidThreshold: parseInt(document.getElementById('schedulerBidThresholdInput').value),
description: document.getElementById('schedulerDescription').value, description: document.getElementById('schedulerDescription').value,
timeRange: document.getElementById('schedulerTimeRange').value
}; };
// 验证Cron表达式格式(简单验证) // 验证Cron表达式格式(简单验证)
@@ -852,36 +824,16 @@ async function saveSchedulerConfig() {
return; return;
} }
// 从localStorage获取邮件配置
const emailConfigStr = localStorage.getItem('emailConfig');
let emailConfig = {};
if (emailConfigStr) {
try {
emailConfig = JSON.parse(emailConfigStr);
} catch (e) {
console.error('解析邮件配置失败:', e);
}
}
// 如果邮件配置为空,提示用户
if (!emailConfig.smtpHost || !emailConfig.smtpUser) {
if (confirm('检测到邮件配置未完成,定时任务需要邮件配置才能发送报告。\n\n是否继续保存定时任务配置(不保存邮件配置)?')) {
// 继续保存,但不包含邮件配置
} else {
return;
}
}
// 构建完整配置对象
const fullConfig = {
scheduler: schedulerConfig,
email: emailConfig
};
showSchedulerStatus('正在保存配置...', 'info'); showSchedulerStatus('正在保存配置...', 'info');
try { try {
// 先获取当前服务器配置(保留 email/scrapers 等字段)
const getResponse = await fetch(`${API_BASE}/config`);
const getData = await getResponse.json();
const currentCfg = (getData.success && getData.data) ? getData.data : {};
const fullConfig = { ...currentCfg, scheduler: schedulerConfig };
const response = await fetch(`${API_BASE}/config`, { const response = await fetch(`${API_BASE}/config`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
@@ -892,7 +844,6 @@ async function saveSchedulerConfig() {
if (data.success) { if (data.success) {
showSchedulerStatus('配置已保存,定时任务已重新加载!', 'success'); showSchedulerStatus('配置已保存,定时任务已重新加载!', 'success');
// 刷新状态显示
await updateSchedulerStatus(); await updateSchedulerStatus();
} else { } else {
showSchedulerStatus(`保存失败: ${data.error}`, 'error'); showSchedulerStatus(`保存失败: ${data.error}`, 'error');
@@ -1255,6 +1206,6 @@ async function sendCombinedReportByEmail() {
} }
// 页面加载时初始化报告日期 // 页面加载时初始化报告日期
document.addEventListener('DOMContentLoaded', function() { document.addEventListener('DOMContentLoaded', function () {
initReportDates(); initReportDates();
}); });

View File

@@ -1,5 +1,6 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="zh-CN"> <html lang="zh-CN">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -95,7 +96,8 @@
color: #333; color: #333;
} }
.form-group input, .form-group select { .form-group input,
.form-group select {
width: 100%; width: 100%;
padding: 12px; padding: 12px;
border: 2px solid #e0e0e0; border: 2px solid #e0e0e0;
@@ -104,7 +106,8 @@
transition: border 0.3s; transition: border 0.3s;
} }
.form-group input:focus, .form-group select:focus { .form-group input:focus,
.form-group select:focus {
outline: none; outline: none;
border-color: #667eea; border-color: #667eea;
} }
@@ -153,8 +156,13 @@
} }
@keyframes spin { @keyframes spin {
0% { transform: rotate(0deg); } 0% {
100% { transform: rotate(360deg); } transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
} }
.results { .results {
@@ -323,8 +331,375 @@
font-size: 14px; font-size: 14px;
margin: 0 10px; margin: 0 10px;
} }
/* ===== 抓取来源配置页样式 ===== */
.scrapers-toolbar {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 20px;
flex-wrap: wrap;
gap: 12px;
}
.scrapers-toolbar h2 {
margin: 0;
color: #667eea;
font-size: 20px;
}
.btn-add {
background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
color: white;
border: none;
padding: 10px 22px;
border-radius: 8px;
font-size: 15px;
cursor: pointer;
font-weight: 600;
display: flex;
align-items: center;
gap: 6px;
transition: all 0.2s;
box-shadow: 0 2px 8px rgba(17, 153, 142, 0.3);
}
.btn-add:hover {
transform: translateY(-1px);
box-shadow: 0 4px 14px rgba(17, 153, 142, 0.4);
}
.scrapers-table-wrap {
overflow-x: auto;
border-radius: 12px;
border: 1px solid #e8eaf0;
box-shadow: 0 2px 12px rgba(102, 126, 234, 0.06);
}
.scrapers-table {
width: 100%;
border-collapse: collapse;
font-size: 14px;
min-width: 800px;
}
.scrapers-table thead tr {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}
.scrapers-table th {
padding: 14px 14px;
text-align: left;
font-weight: 600;
white-space: nowrap;
}
.scrapers-table tbody tr {
border-bottom: 1px solid #f0f0f5;
transition: background 0.15s;
}
.scrapers-table tbody tr:last-child {
border-bottom: none;
}
.scrapers-table tbody tr:hover {
background: #f5f7ff;
}
.scrapers-table td {
padding: 12px 14px;
vertical-align: top;
color: #333;
}
.scrapers-table td.prompt-cell {
max-width: 220px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
color: #666;
font-size: 13px;
}
.tag {
display: inline-block;
padding: 2px 10px;
border-radius: 20px;
font-size: 12px;
font-weight: 600;
white-space: nowrap;
}
.tag-type {
background: #e8f4fd;
color: #1a73c8;
}
.tag-enabled {
background: #e4f9ee;
color: #1a8a4a;
}
.tag-disabled {
background: #feeaea;
color: #c0392b;
}
.url-cell a {
color: #667eea;
text-decoration: none;
font-size: 12px;
word-break: break-all;
}
.url-cell a:hover {
text-decoration: underline;
}
.action-btns {
display: flex;
gap: 6px;
flex-wrap: wrap;
}
.btn-sm {
padding: 5px 12px;
border-radius: 6px;
border: none;
font-size: 12px;
font-weight: 600;
cursor: pointer;
transition: all 0.15s;
white-space: nowrap;
}
.btn-edit {
background: #fff3cd;
color: #856404;
}
.btn-edit:hover {
background: #ffc107;
color: #fff;
}
.btn-delete {
background: #fdeaea;
color: #c0392b;
}
.btn-delete:hover {
background: #e74c3c;
color: #fff;
}
.btn-run {
background: #e8f4fd;
color: #1a73c8;
}
.btn-run:hover {
background: #667eea;
color: #fff;
}
.btn-toggle-on {
background: #e4f9ee;
color: #1a8a4a;
}
.btn-toggle-on:hover {
background: #27ae60;
color: #fff;
}
.btn-toggle-off {
background: #feeaea;
color: #c0392b;
}
.btn-toggle-off:hover {
background: #e74c3c;
color: #fff;
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: #aaa;
}
.empty-state svg {
margin-bottom: 12px;
opacity: 0.4;
}
/* 弹窗 */
.modal-overlay {
display: none;
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.45);
z-index: 1000;
align-items: center;
justify-content: center;
}
.modal-overlay.show {
display: flex;
}
.modal-box {
background: white;
border-radius: 16px;
padding: 32px;
width: 600px;
max-width: 95vw;
max-height: 90vh;
overflow-y: auto;
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
animation: modalIn 0.2s ease;
}
@keyframes modalIn {
from {
opacity: 0;
transform: scale(0.95) translateY(-10px);
}
to {
opacity: 1;
transform: scale(1) translateY(0);
}
}
.modal-header {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 24px;
}
.modal-header h3 {
margin: 0;
color: #333;
font-size: 18px;
}
.modal-close {
background: none;
border: none;
font-size: 24px;
cursor: pointer;
color: #999;
line-height: 1;
padding: 0;
}
.modal-close:hover {
color: #333;
}
.modal-form .form-row {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 14px;
}
.modal-form .form-group {
margin-bottom: 16px;
}
.modal-form .form-group label {
display: block;
font-size: 13px;
font-weight: 600;
color: #555;
margin-bottom: 6px;
}
.modal-form .form-group input,
.modal-form .form-group select,
.modal-form .form-group textarea {
width: 100%;
padding: 10px 12px;
border: 1.5px solid #e0e0e0;
border-radius: 8px;
font-size: 14px;
font-family: inherit;
transition: border 0.2s;
box-sizing: border-box;
}
.modal-form .form-group input:focus,
.modal-form .form-group select:focus,
.modal-form .form-group textarea:focus {
outline: none;
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.12);
}
.modal-form .form-group textarea {
resize: vertical;
min-height: 90px;
}
.modal-footer {
display: flex;
justify-content: flex-end;
gap: 10px;
margin-top: 20px;
}
.btn-cancel {
background: #f0f0f0;
color: #555;
border: none;
padding: 10px 24px;
border-radius: 8px;
font-size: 14px;
cursor: pointer;
font-weight: 600;
}
.btn-cancel:hover {
background: #e0e0e0;
}
.btn-save {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border: none;
padding: 10px 28px;
border-radius: 8px;
font-size: 14px;
cursor: pointer;
font-weight: 600;
transition: all 0.2s;
}
.btn-save:hover {
box-shadow: 0 4px 14px rgba(102, 126, 234, 0.4);
}
.run-result {
margin-top: 16px;
padding: 14px;
background: #f7f8ff;
border-radius: 8px;
border: 1px solid #e0e5ff;
font-size: 13px;
max-height: 300px;
overflow-y: auto;
white-space: pre-wrap;
word-break: break-all;
color: #333;
}
</style> </style>
</head> </head>
<body> <body>
<div class="container"> <div class="container">
<div class="header"> <div class="header">
@@ -333,124 +708,37 @@
</div> </div>
<div class="tabs"> <div class="tabs">
<button class="tab active" onclick="switchTab('list')">中标公示</button> <button class="tab active" onclick="switchTab('scheduler')">定时任务</button>
<button class="tab" onclick="switchTab('bidAnnounce')">招标公告</button>
<button class="tab" onclick="switchTab('report')">生成报告</button>
<button class="tab" onclick="switchTab('scheduler')">定时任务</button>
<button class="tab" onclick="switchTab('email')">邮件配置</button> <button class="tab" onclick="switchTab('email')">邮件配置</button>
<button class="tab" onclick="switchTab('scrapers')">抓取来源</button>
<a href="/results.html" target="_blank" class="tab" style="text-decoration:none;color:inherit;">📊 抓取结果</a>
</div> </div>
<div class="content"> <div class="content">
<!-- 公告列表 -->
<div id="list" class="tab-content active">
<div class="form-group">
<label>页码 (第1页为最新公告)</label>
<input type="number" id="listPage" value="1" min="1" max="300">
</div>
<button class="btn" onclick="fetchList()">获取公告列表</button>
<div id="listLoading" class="loading">
<div class="spinner"></div>
<p>正在采集...</p>
</div>
<div id="listResults" class="results"></div>
<div id="listPagination" class="pagination" style="display:none;">
<button onclick="goToListPage(1)" id="listFirstPage">首页</button>
<button onclick="goToListPage(currentListPage - 1)" id="listPrevPage">上一页</button>
<span class="page-info"><span id="listCurrentPage">1</span></span>
<button onclick="goToListPage(currentListPage + 1)" id="listNextPage">下一页</button>
</div>
</div>
<!-- 招标公告 -->
<div id="bidAnnounce" class="tab-content">
<h2 style="margin-bottom: 20px; color: #e67e22;">交通水务招标公告</h2>
<p style="color: #666; margin-bottom: 20px;">浏览招标公告列表</p>
<div class="form-group">
<label>页码 (第1页为最新公告)</label>
<input type="number" id="bidListPage" value="1" min="1" max="300">
</div>
<button class="btn" onclick="fetchBidList()" style="background: linear-gradient(135deg, #e67e22 0%, #d35400 100%);">获取招标列表</button>
<div id="bidListLoading" class="loading">
<div class="spinner"></div>
<p>正在获取招标公告列表...</p>
</div>
<div id="bidListResults" class="results"></div>
<div id="bidListPagination" class="pagination" style="display:none;">
<button onclick="goToBidListPage(1)" id="bidFirstPage" style="border-color: #e67e22; color: #e67e22;">首页</button>
<button onclick="goToBidListPage(currentBidListPage - 1)" id="bidPrevPage" style="border-color: #e67e22; color: #e67e22;">上一页</button>
<span class="page-info"><span id="bidCurrentPage">1</span></span>
<button onclick="goToBidListPage(currentBidListPage + 1)" id="bidNextPage" style="border-color: #e67e22; color: #e67e22;">下一页</button>
</div>
</div>
<!-- 生成报告 -->
<div id="report" class="tab-content">
<h2 style="margin-bottom: 20px; color: #667eea;">生成综合报告</h2>
<p style="color: #666; margin-bottom: 20px;">同时采集中标公示和招标公告,生成综合报告</p>
<div class="form-group">
<label>开始日期</label>
<input type="date" id="startDate">
</div>
<div class="form-group">
<label>结束日期</label>
<input type="date" id="endDate">
</div>
<div class="form-group">
<label>最大采集页数</label>
<input type="number" id="maxPages" value="10" min="1" max="50">
</div>
<div class="form-group">
<label>中标金额阈值 (万元) - 只显示大于此金额的中标项目0表示不筛选</label>
<input type="number" id="reportThreshold" value="10000" min="0" step="100">
</div>
<div class="form-group">
<label>招标金额阈值 (万元) - 只显示大于此金额的招标项目0表示不筛选</label>
<input type="number" id="bidReportThreshold" value="0" min="0" step="100">
</div>
<button class="btn" onclick="generateCombinedReport()">生成综合报告</button>
<button class="btn" onclick="sendCombinedReportByEmail()" id="sendEmailBtn" style="display:none; background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);">发送邮件</button>
<div id="reportLoading" class="loading">
<div class="spinner"></div>
<p id="reportLoadingText">正在生成报告...</p>
</div>
<div id="reportResults" class="results"></div>
</div>
<!-- 定时任务 --> <!-- 定时任务 -->
<div id="scheduler" class="tab-content"> <div id="scheduler" class="tab-content active">
<h2 style="margin-bottom: 20px; color: #667eea;">定时任务配置</h2> <h2 style="margin-bottom: 20px; color: #667eea;">定时任务配置</h2>
<p style="color: #666; margin-bottom: 20px;">配置定时任务自动采集大于指定金额的项目并发送邮件报告</p> <p style="color: #666; margin-bottom: 20px;">配置定时任务自动采集大于指定金额的项目并发送邮件报告</p>
<!-- 任务状态 --> <!-- 任务状态 -->
<div id="schedulerStatus" style="margin-bottom: 30px; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 8px;"> <div id="schedulerStatus"
style="margin-bottom: 30px; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 8px;">
<h3 style="margin-top: 0; margin-bottom: 15px;">任务状态</h3> <h3 style="margin-top: 0; margin-bottom: 15px;">任务状态</h3>
<div style="display: flex; gap: 30px; flex-wrap: wrap;"> <div style="display: flex; gap: 30px; flex-wrap: wrap;">
<div> <div>
<div style="opacity: 0.9; font-size: 14px;">运行状态</div> <div style="opacity: 0.9; font-size: 14px;">运行状态</div>
<div style="font-size: 20px; font-weight: bold; margin-top: 5px;" id="schedulerRunningStatus">加载中...</div> <div style="font-size: 20px; font-weight: bold; margin-top: 5px;"
id="schedulerRunningStatus">加载中...</div>
</div> </div>
<div> <div>
<div style="opacity: 0.9; font-size: 14px;">执行时间</div> <div style="opacity: 0.9; font-size: 14px;">执行时间</div>
<div style="font-size: 20px; font-weight: bold; margin-top: 5px;" id="schedulerCronTime">-</div> <div style="font-size: 20px; font-weight: bold; margin-top: 5px;" id="schedulerCronTime">-
</div>
</div> </div>
<div> <div>
<div style="opacity: 0.9; font-size: 14px;">中标阈值</div> <div style="opacity: 0.9; font-size: 14px;">已启用来源</div>
<div style="font-size: 20px; font-weight: bold; margin-top: 5px;" id="schedulerWinningThreshold">-</div> <div style="font-size: 20px; font-weight: bold; margin-top: 5px;"
</div> id="schedulerEnabledCount">-</div>
<div>
<div style="opacity: 0.9; font-size: 14px;">招标阈值</div>
<div style="font-size: 20px; font-weight: bold; margin-top: 5px;" id="schedulerBidThreshold">-</div>
</div> </div>
</div> </div>
</div> </div>
@@ -500,52 +788,35 @@
<!-- 隐藏的Cron表达式字段 --> <!-- 隐藏的Cron表达式字段 -->
<input type="hidden" id="schedulerCronInput" value="0 9 * * *"> <input type="hidden" id="schedulerCronInput" value="0 9 * * *">
<div class="form-group">
<label>采集时间段</label>
<select id="schedulerTimeRange">
<option value="today">今日</option>
<option value="thisWeek">本周</option>
<option value="thisMonth" selected>本月</option>
</select>
<small style="color: #666; display: block; margin-top: 5px;">
今日:今天 | 本周:本周一至今 | 本月:本月1日至今
</small>
</div>
<div class="form-group"> <div class="form-group">
<label>中标金额阈值 (万元) - 只采集大于此金额的中标公示</label> <label>金额阈值(万元)- 邮件报告中只显示大于此金额的条目0 表示不筛选</label>
<input type="number" id="schedulerWinningThresholdInput" value="100000" min="0" step="1000"> <input type="number" id="schedulerThresholdInput" value="0" min="0" step="1000">
<small style="color: #666; display: block; margin-top: 5px;"> <small style="color: #666; display: block; margin-top: 5px;">
10亿 = 100000万元 | 5亿 = 50000万元 | 1亿 = 10000万元 10亿 = 100000万元 | 1亿 = 10000万元 | 0 = 不筛选,全部显示
</small>
</div>
<div class="form-group">
<label>招标金额阈值 (万元) - 只采集大于此金额的招标公告0表示不筛选</label>
<input type="number" id="schedulerBidThresholdInput" value="0" min="0" step="1000">
<small style="color: #666; display: block; margin-top: 5px;">
设为0时不筛选金额只要有合同估算价的招标公告都会采集
</small> </small>
</div> </div>
<div class="form-group"> <div class="form-group">
<label>任务描述 (可选)</label> <label>任务描述 (可选)</label>
<input type="text" id="schedulerDescription" placeholder="例如: 每天9点采集大于1亿的项目"> <input type="text" id="schedulerDescription" placeholder="例如: 每天9点自动抓取所有启用来源">
</div> </div>
<button class="btn" onclick="saveSchedulerConfig()">保存配置</button> <button class="btn" onclick="saveSchedulerConfig()">保存配置</button>
<button class="btn" onclick="testSchedulerNow()" style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);">立即测试运行</button> <button class="btn" onclick="testSchedulerNow()"
style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);">立即测试运行</button>
<button class="btn" onclick="loadSchedulerConfig()" style="background: #6c757d;">刷新状态</button> <button class="btn" onclick="loadSchedulerConfig()" style="background: #6c757d;">刷新状态</button>
<div id="schedulerConfigStatus" style="margin-top: 20px;"></div> <div id="schedulerConfigStatus" style="margin-top: 20px;"></div>
<div style="margin-top: 30px; padding: 20px; background: #fff3cd; border-radius: 8px; border-left: 4px solid #ffc107;"> <div
style="margin-top: 30px; padding: 20px; background: #fff3cd; border-radius: 8px; border-left: 4px solid #ffc107;">
<h3 style="margin-top: 0; color: #856404;">使用说明</h3> <h3 style="margin-top: 0; color: #856404;">使用说明</h3>
<ul style="line-height: 1.8; color: #856404;"> <ul style="line-height: 1.8; color: #856404;">
<li><strong>数据来源:</strong> 南京公共资源交易平台 - 交通水务中标公示 + 招标公告</li> <li><strong>数据来源:</strong> 运行「抓取来源」页中所有已启用的抓取配置</li>
<li><strong>中标采集:</strong> 标段编号、项目名称、标段名称、中标价格、中标日期(按中标阈值筛选)</li> <li><strong>自动抓取:</strong> 按计划时间自动逐个运行所有启用的抓取来源,结果保存到「抓取结果」页</li>
<li><strong>招标采集:</strong> 项目名称、标段编码、招标人、合同估算价、工期按招标阈值筛选0表示不筛选</li> <li><strong>邮件通知:</strong> 抓取完成后自动将结果发送到配置的邮箱(需先完成邮件配置</li>
<li><strong>邮件发送:</strong> 自动将中标+招标综合报告生成HTML邮件并发送到配置的邮箱</li> <li><strong>提示:</strong> 请前往「抓取来源」页配置并启用需要定时抓取的来源</li>
</ul> </ul>
</div> </div>
</div> </div>
@@ -581,11 +852,13 @@
</div> </div>
<button class="btn" onclick="saveEmailConfig()">保存配置</button> <button class="btn" onclick="saveEmailConfig()">保存配置</button>
<button class="btn" onclick="testEmailConfig()" style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);">测试连接</button> <button class="btn" onclick="testEmailConfig()"
style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);">测试连接</button>
<div id="emailConfigStatus" style="margin-top: 20px;"></div> <div id="emailConfigStatus" style="margin-top: 20px;"></div>
<div style="margin-top: 30px; padding: 20px; background: #f0f8ff; border-radius: 8px; border-left: 4px solid #667eea;"> <div
style="margin-top: 30px; padding: 20px; background: #f0f8ff; border-radius: 8px; border-left: 4px solid #667eea;">
<h3 style="margin-top: 0; color: #667eea;">常用邮箱配置参考</h3> <h3 style="margin-top: 0; color: #667eea;">常用邮箱配置参考</h3>
<ul style="line-height: 1.8; color: #666;"> <ul style="line-height: 1.8; color: #666;">
<li><strong>QQ邮箱:</strong> smtp.qq.com, 端口 587 或 465, 需要使用授权码</li> <li><strong>QQ邮箱:</strong> smtp.qq.com, 端口 587 或 465, 需要使用授权码</li>
@@ -599,9 +872,345 @@
</div> </div>
</div> </div>
</div> </div>
<!-- 抓取来源配置 -->
<div id="scrapers" class="tab-content" style="padding:30px ;">
<div class="scrapers-toolbar">
<h2>抓取来源配置</h2>
<div style="display:flex;gap:10px;flex-wrap:wrap;">
<button class="btn-add" onclick="runAllScrapers()" id="btnRunAll"
style="background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);">
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2.5"
viewBox="0 0 24 24">
<polygon points="5,3 19,12 5,21" />
</svg>
运行全部启用
</button>
<a href="/results.html" target="_blank" class="btn-add"
style="background:linear-gradient(135deg,#11998e 0%,#38ef7d 100%);text-decoration:none;">
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2.5"
viewBox="0 0 24 24">
<rect x="3" y="3" width="18" height="18" rx="3" />
<path d="M3 9h18M9 21V9" />
</svg>
查看结果
</a>
<button class="btn-add" onclick="openScraperModal()" style="cursor:pointer;">
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2.5"
viewBox="0 0 24 24">
<line x1="12" y1="5" x2="12" y2="19" />
<line x1="5" y1="12" x2="19" y2="12" />
</svg>
新增来源
</button>
</div>
</div>
<p style="color:#888;font-size:13px;margin:-8px 0 18px;">通过配置 URL 和提示词,使用 Firecrawl Agent
抓取任意网页数据。结果会自动保存,可在「抓取结果」页查看历史。</p>
<div class="scrapers-table-wrap">
<table class="scrapers-table">
<thead>
<tr>
<th style="width:80px">城市</th>
<th style="width:80px">板块</th>
<th style="width:70px">子板块</th>
<th style="width:80px">类型</th>
<th>链接地址</th>
<th>提示词</th>
<th style="width:70px">AI模型</th>
<th style="width:60px">状态</th>
<th style="width:180px">操作</th>
</tr>
</thead>
<tbody id="scrapersTbody">
<tr id="scrapers-empty-row">
<td colspan="9" class="empty-state">
<svg width="48" height="48" fill="none" stroke="currentColor" stroke-width="1.5"
viewBox="0 0 24 24">
<rect x="3" y="3" width="18" height="18" rx="3" />
<path d="M3 9h18M9 21V9" />
</svg>
<div>暂无配置,点击「新增来源」添加抓取任务</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- 批量运行状态 -->
<div id="batchRunStatus"
style="display:none;margin-top:16px;padding:14px;background:#f7f8ff;border-radius:8px;border:1px solid #e0e5ff;font-size:13px;color:#333;">
</div>
<!-- 测试结果展示 -->
<div id="scraperRunResult" style="display:none;">
<div style="margin-top:20px;font-size:14px;font-weight:600;color:#333;margin-bottom:8px;">📋 测试抓取结果
</div>
<div class="run-result" id="scraperRunResultContent"></div>
</div>
</div>
</div>
</div>
<!-- 新增/编辑弹窗 -->
<div class="modal-overlay" id="scraperModal">
<div class="modal-box">
<div class="modal-header">
<h3 id="scraperModalTitle">新增抓取来源</h3>
<button class="modal-close" onclick="closeScraperModal()">×</button>
</div>
<form class="modal-form" onsubmit="saveScraperItem(event)">
<input type="hidden" id="scraperEditId">
<div class="form-row">
<div class="form-group">
<label>城市 *</label>
<input type="text" id="scraperCity" placeholder="例: 南京市" required>
</div>
<div class="form-group">
<label>板块</label>
<input type="text" id="scraperSection" placeholder="例: 交通水务">
</div>
</div>
<div class="form-row">
<div class="form-group">
<label>子板块</label>
<input type="text" id="scraperSubsection" placeholder="例: 建设工程">
</div>
<div class="form-group">
<label>类型</label>
<input type="text" id="scraperType" placeholder="例: 招标公告">
</div>
</div>
<div class="form-group">
<label>链接地址 *</label>
<input type="url" id="scraperUrl" placeholder="https://..." required>
</div>
<div class="form-group">
<label>提示词Agent 指令)*</label>
<textarea id="scraperPrompt"
placeholder="提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等、发布日期YYYY-MM-DD格式、详情页完整URL"
required></textarea>
</div>
<div class="form-row">
<div class="form-group">
<label>AI 模型</label>
<select id="scraperModel">
<option value="spark-1-mini">spark-1-mini默认</option>
<option value="spark-2">spark-2</option>
<option value="gpt-4o-mini">gpt-4o-mini</option>
<option value="claude-3-haiku">claude-3-haiku</option>
</select>
</div>
<div class="form-group" style="display:flex;align-items:flex-end;padding-bottom:2px;">
<div class="checkbox-wrapper" style="width:100%;"
onclick="document.getElementById('scraperEnabled').click();">
<input type="checkbox" id="scraperEnabled" checked onclick="event.stopPropagation();">
<label for="scraperEnabled">启用此来源</label>
</div>
</div>
</div>
<div class="modal-footer">
<button type="button" class="btn-cancel" onclick="closeScraperModal()">取消</button>
<button type="submit" class="btn-save">保存</button>
</div>
</form>
</div>
</div> </div>
<!-- docx库已改为按需加载,只在用户点击导出时才加载,提升首屏加载速度 --> <!-- docx库已改为按需加载,只在用户点击导出时才加载,提升首屏加载速度 -->
<script src="app.js" defer></script> <script src="app.js" defer></script>
<script>
// ===== 抓取来源配置 JS =====
let scrapersList = [];
async function loadScrapers() {
try {
const res = await fetch('/api/scrapers');
const json = await res.json();
scrapersList = json.data || [];
renderScrapers();
} catch (e) {
console.error('加载抓取来源失败:', e);
}
}
function renderScrapers() {
const tbody = document.getElementById('scrapersTbody');
if (scrapersList.length === 0) {
tbody.innerHTML = `<tr id="scrapers-empty-row"><td colspan="9" class="empty-state">
<svg width="48" height="48" fill="none" stroke="currentColor" stroke-width="1.5" viewBox="0 0 24 24"><rect x="3" y="3" width="18" height="18" rx="3"/><path d="M3 9h18M9 21V9"/></svg>
<div>暂无配置,点击「新增来源」添加抓取任务</div></td></tr>`;
return;
}
tbody.innerHTML = scrapersList.map(s => `
<tr>
<td>${s.city || '-'}</td>
<td>${s.section || '-'}</td>
<td>${s.subsection || '-'}</td>
<td><span class="tag tag-type">${s.type || ''}</span></td>
<td class="url-cell"><a href="${s.url}" target="_blank" title="${s.url}">${s.url.replace(/^https?:\/\//, '').substring(0, 35)}${s.url.length > 40 ? '...' : ''}</a></td>
<td class="prompt-cell" title="${(s.prompt || '').replace(/"/g, '&quot;')}">${s.prompt || '-'}</td>
<td style="font-size:12px;color:#888;">${s.model || 'spark-1-mini'}</td>
<td><span class="tag ${s.enabled ? 'tag-enabled' : 'tag-disabled'}">${s.enabled ? '启用' : '禁用'}</span></td>
<td>
<div class="action-btns">
<button class="btn-sm btn-edit" onclick="openScraperModal('${s.id}')" title="编辑">编辑</button>
<button class="btn-sm btn-run" onclick="runScraper('${s.id}')" title="测试运行">测试</button>
<button class="btn-sm ${s.enabled ? 'btn-toggle-on' : 'btn-toggle-off'}" onclick="toggleScraper('${s.id}', ${!s.enabled})" title="切换启用状态">${s.enabled ? '禁用' : '启用'}</button>
<button class="btn-sm btn-delete" onclick="deleteScraper('${s.id}')" title="删除">删除</button>
</div>
</td>
</tr>
`).join('');
}
function openScraperModal(id) {
const item = id ? scrapersList.find(s => s.id === id) : null;
document.getElementById('scraperModalTitle').textContent = item ? '编辑抓取来源' : '新增抓取来源';
document.getElementById('scraperEditId').value = item ? item.id : '';
document.getElementById('scraperCity').value = item ? item.city : '';
document.getElementById('scraperSection').value = item ? item.section : '';
document.getElementById('scraperSubsection').value = item ? item.subsection : '';
document.getElementById('scraperType').value = item ? item.type : '招标公告';
document.getElementById('scraperUrl').value = item ? item.url : '';
document.getElementById('scraperPrompt').value = item ? item.prompt : '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等、发布日期YYYY-MM-DD格式、详情页完整URL';
document.getElementById('scraperModel').value = item ? (item.model || 'spark-1-mini') : 'spark-1-mini';
document.getElementById('scraperEnabled').checked = item ? item.enabled : true;
document.getElementById('scraperRunResult').style.display = 'none';
document.getElementById('scraperModal').classList.add('show');
}
function closeScraperModal() {
document.getElementById('scraperModal').classList.remove('show');
}
async function saveScraperItem(e) {
e.preventDefault();
const id = document.getElementById('scraperEditId').value;
const data = {
city: document.getElementById('scraperCity').value.trim(),
url: document.getElementById('scraperUrl').value.trim(),
section: document.getElementById('scraperSection').value.trim(),
subsection: document.getElementById('scraperSubsection').value.trim(),
type: document.getElementById('scraperType').value,
prompt: document.getElementById('scraperPrompt').value.trim(),
model: document.getElementById('scraperModel').value,
enabled: document.getElementById('scraperEnabled').checked,
};
try {
const url = id ? `/api/scrapers/${id}` : '/api/scrapers';
const method = id ? 'PUT' : 'POST';
const res = await fetch(url, { method, headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(data) });
const json = await res.json();
if (!json.success) throw new Error(json.error);
closeScraperModal();
await loadScrapers();
} catch (err) {
alert('保存失败: ' + err.message);
}
}
async function deleteScraper(id) {
const item = scrapersList.find(s => s.id === id);
if (!confirm(`确定要删除「${item?.city} - ${item?.type}」吗?`)) return;
try {
const res = await fetch(`/api/scrapers/${id}`, { method: 'DELETE' });
const json = await res.json();
if (!json.success) throw new Error(json.error);
await loadScrapers();
} catch (err) {
alert('删除失败: ' + err.message);
}
}
async function toggleScraper(id, enabled) {
try {
const res = await fetch(`/api/scrapers/${id}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ enabled })
});
const json = await res.json();
if (!json.success) throw new Error(json.error);
await loadScrapers();
} catch (err) {
alert('操作失败: ' + err.message);
}
}
async function runScraper(id) {
const item = scrapersList.find(s => s.id === id);
const resultDiv = document.getElementById('scraperRunResult');
const contentDiv = document.getElementById('scraperRunResultContent');
resultDiv.style.display = 'block';
contentDiv.textContent = `正在测试抓取「${item?.city} - ${item?.type}」,请稍候...`;
try {
const res = await fetch(`/api/scrapers/${id}/run`, { method: 'POST' });
const json = await res.json();
if (!json.success) throw new Error(json.error);
contentDiv.textContent = JSON.stringify(json.data, null, 2);
} catch (err) {
contentDiv.textContent = '❌ 测试失败: ' + err.message;
}
}
// 切换到抓取来源 Tab 时自动加载
const _origSwitchTab = typeof switchTab === 'function' ? switchTab : null;
document.addEventListener('DOMContentLoaded', () => {
// 拦截 tab 切换,在进入 scrapers tab 时加载数据
document.querySelectorAll('.tab').forEach(btn => {
if (btn.textContent.trim() === '抓取来源') {
btn.addEventListener('click', () => { loadScrapers(); });
}
});
// 点击弹窗遮罩关闭
document.getElementById('scraperModal').addEventListener('click', function (e) {
if (e.target === this) closeScraperModal();
});
});
// 批量运行所有已启用来源
async function runAllScrapers() {
const enabled = scrapersList.filter(s => s.enabled);
if (enabled.length === 0) {
alert('没有已启用的抓取来源,请先在列表中启用至少一个来源。');
return;
}
if (!confirm(`确定要运行全部 ${enabled.length} 个已启用的抓取来源吗?\n结果将自动保存,可在「抓取结果」页查看。`)) return;
const btn = document.getElementById('btnRunAll');
const statusDiv = document.getElementById('batchRunStatus');
btn.disabled = true;
btn.textContent = '运行中...';
statusDiv.style.display = 'block';
statusDiv.innerHTML = `⏳ 正在运行 ${enabled.length} 个抓取来源,请稍候...`;
try {
const res = await fetch('/api/scrape/run', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({}),
});
const json = await res.json();
if (!json.success) throw new Error(json.error);
const results = json.data || [];
const ok = results.filter(r => !r.error).length;
const err = results.filter(r => r.error).length;
statusDiv.innerHTML = `
✅ 批量抓取完成!成功 <strong>${ok}</strong> 条,失败 <strong>${err}</strong> 条。
&nbsp;&nbsp;<a href="/results.html" target="_blank" style="color:#667eea;font-weight:600;text-decoration:underline;">点击查看抓取结果 →</a>
`;
} catch (e) {
statusDiv.innerHTML = `❌ 批量运行失败: ${e.message}`;
} finally {
btn.disabled = false;
btn.innerHTML = `<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2.5" viewBox="0 0 24 24"><polygon points="5,3 19,12 5,21"/></svg> 运行全部启用`;
}
}
</script>
</body> </body>
</html> </html>

1273
public/results.html Normal file

File diff suppressed because it is too large Load Diff

432
results.json Normal file
View File

@@ -0,0 +1,432 @@
[
{
"scraperId": "scraper-1772762354799",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
"scrapedAt": "2026-03-06T06:57:46.881Z",
"data": {
"result": [
{
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
"amount": "5,923,797元",
"date": "2026-03-05",
"url": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
}
],
"total": 1
},
"id": "result-1772780266881-odaof"
},
{
"scraperId": "scraper-1772762354799",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
"scrapedAt": "2026-03-06T06:42:40.619Z",
"data": {
"result": [
{
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
"amount": "5923797元",
"date": "2026-03-05",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
}
],
"total": 1
},
"id": "result-1772779360620-xr7ue"
},
{
"scraperId": "scraper-1772762354799",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
"scrapedAt": "2026-03-06T04:02:43.530Z",
"data": {
"items": [
{
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
"amount": "5923797元",
"date": "2026-03-05",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
}
],
"total": 1
},
"id": "result-1772769763530-3axw2"
},
{
"scraperId": "scraper-1772762354799",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
"scrapedAt": "2026-03-06T02:51:39.452Z",
"error": "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value.",
"data": null,
"id": "result-1772765499452-ynhn0"
},
{
"scraperId": "scraper-1772762494299",
"city": "南京市",
"section": "房建市政",
"subsection": "工程类",
"type": "招标公告",
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/buildService1.html",
"scrapedAt": "2026-03-06T02:32:03.818Z",
"data": {
"success": true,
"status": "completed",
"data": {
"target_date": "2026-03-06",
"notice_count": 0,
"notices": [],
"message": "截至当前时间2026-03-06 02:19网站尚未发布今日2026-03-06的招标公告。最新公告日期为2026-03-05。",
"recent_notices_fallback": [
{
"title": "麒麟科创园具身智能训练场装修项目",
"date": "2026-03-05",
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/a20ee94f-b76e-4f88-b8df-2847c2f35ce1.html",
"amount": "5660000.00"
},
{
"title": "站东13号MCd080-07-08地块10kV电力杆线迁改工程",
"date": "2026-03-05",
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/f0b99840-e8de-4a08-b2ba-3e57a347864c.html",
"amount": "9543100.00"
},
{
"title": "【澄清公告】螺丝桥大街北延(月安街至应天大街段)道路工程",
"date": "2026-03-05",
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/1b3da624-fe86-4755-a268-a1967cd9d489.html",
"amount": "900万元"
},
{
"title": "建邺路150-164号等9个地块城中村改造项目",
"date": "2026-03-05",
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/6f4fcf2f-d198-4814-acd8-9817ef559a0c.html",
"amount": "1,900,000.00"
},
{
"title": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程",
"date": "2026-03-05",
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/11ec2263-4ed1-4115-bdd1-0a6dcbf1d6c1.html",
"amount": "11320.01万元"
}
]
},
"model": "spark-1-mini",
"expiresAt": "2026-03-07T02:32:00.316Z",
"creditsUsed": 0
},
"id": "result-1772764323818-mj8km"
},
{
"scraperId": "scraper-1772762354799",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/zbgg/index.shtml",
"scrapedAt": "2026-03-06T02:19:27.580Z",
"data": {
"success": true,
"status": "completed",
"data": [
{
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
"project_amount": "5,923,797元 (最高投标限价)",
"publish_date": "2026-03-05",
"detail_url": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
}
],
"model": "spark-1-mini",
"expiresAt": "2026-03-07T02:19:24.631Z",
"creditsUsed": 0
},
"id": "result-1772763567581-ahz62"
},
{
"scraperId": "scraper-1772699302521",
"city": "无锡市",
"section": "",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
"scrapedAt": "2026-03-05T10:05:46.148Z",
"data": {
"success": true,
"status": "completed",
"data": {
"announcements": [
{
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
"project_amount": "最高投标限价5923797元",
"publish_date": "2026-03-05",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
}
]
},
"model": "spark-1-mini",
"expiresAt": "2026-03-06T10:05:45.297Z",
"creditsUsed": 180
},
"id": "result-1772705146148-kn0ko"
},
{
"scraperId": "scraper-1772699302521",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/index.shtml",
"scrapedAt": "2026-03-05T10:02:01.153Z",
"data": {
"success": true,
"status": "completed",
"data": [
{
"标题": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
"项目金额": "5,923,797元",
"发布日期": "2026-03-05",
"详情页完整URL": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
}
],
"model": "spark-1-mini",
"expiresAt": "2026-03-06T10:02:00.100Z",
"creditsUsed": 769
},
"id": "result-1772704921153-jx48m"
},
{
"scraperId": "scraper-1772699302521",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/",
"scrapedAt": "2026-03-05T09:23:03.452Z",
"data": {
"success": true,
"status": "completed",
"data": {
"announcements": [
{
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
"amount": "5,923,797元",
"publish_date": "2026-03-05",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
},
{
"title": "[WXHS202603001-X01]惠山区紧密型县域医共体服务能力提标扩能建设项目(惠山区人民医院紧密型医共体资源共享中心建设项目)勘察设计",
"amount": "570.00万元",
"publish_date": "2026-03-05",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741246.shtml"
}
]
},
"model": "spark-1-mini",
"expiresAt": "2026-03-06T09:23:01.561Z",
"creditsUsed": 0
},
"id": "result-1772702583452-9t3b8"
},
{
"scraperId": "scraper-1772699302521",
"city": "无锡市",
"section": "水利工程",
"subsection": "",
"type": "招标公告",
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/",
"scrapedAt": "2026-03-05T08:39:45.736Z",
"data": {
"success": true,
"status": "completed",
"data": [
{
"title": "[WXJY202601013-X01]江阴市长泾镇蒲市村区域性综合农事服务中心江阴市",
"amount": "874.0万元",
"date": "2026-01-30",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726538.shtml"
},
{
"title": "[WXXS202406006-X02]中共锡山区委党校异地新建项目施工总承包",
"amount": "10350.0万元",
"date": "2026-01-30",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726721.shtml"
},
{
"title": "[WXXQ202601010-X01]无锡交响音乐厅“一厅”及“两中心”品牌商户用房",
"amount": "400.0万元",
"date": "2026-01-30",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726619.shtml"
},
{
"title": "[WXXQ202601008-X01]生命园三期2号楼、3号楼改造项目工程总承包",
"amount": "3650.0万元",
"date": "2026-01-30",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726675.shtml"
},
{
"title": "[WXBH202601007-X01]军嶂山显山透绿工程-吴杨路郊野覆绿工程施工",
"amount": "440.0万元",
"date": "2026-01-30",
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726726.shtml"
}
],
"model": "spark-1-mini",
"expiresAt": "2026-03-06T08:39:45.265Z",
"creditsUsed": 0
},
"id": "result-1772699985736-b3nca"
},
{
"scraperId": "nj-jtsw-zbgg",
"city": "南京市",
"section": "房建市政",
"subsection": "工程类",
"type": "招标公告",
"url": "https://njggzy.nanjing.gov.cn/njweb/",
"scrapedAt": "2026-03-05T08:05:33.097Z",
"data": {
"success": true,
"status": "completed",
"data": {
"招标公告": [
{
"标题": "【澄清公告】螺丝桥大街北延(月安街至应天大街段)道路工程 - 施工",
"项目金额": "900 万元",
"发布日期": "2026-03-05",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/1b3da624-fe86-4755-a268-a1967cd9d489.html"
},
{
"标题": "建邺路150-164号等9个地块城中村改造项目 - 施工",
"项目金额": "190 万元",
"发布日期": "2026-03-05",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/6f4fcf2f-d198-4814-acd8-9817ef559a0c.html"
},
{
"标题": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程 - 施工",
"项目金额": "11320.01 万元",
"发布日期": "2026-03-05",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/11ec2263-4ed1-4115-bdd1-0a6dcbf1d6c1.html"
},
{
"标题": "栖霞区百水芊城春水坊等5个片区排水管网改造工程 - 施工",
"项目金额": "435.86 万元",
"发布日期": "2026-03-05",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/d69e5640-d549-4638-a64a-d1f9df58a903.html"
},
{
"标题": "【澄清公告】兰桥八期保障性住房项目 - 新建居住区供配电工程",
"项目金额": "6000 万元",
"发布日期": "2026-03-04",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/33e25a55-42c4-471e-9a3c-f8e792957141.html"
},
{
"标题": "青云巷10号危房整治工程 - SG1施工",
"项目金额": "375 万元",
"发布日期": "2026-03-04",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/e821f82c-39d8-479e-9457-b6bf5d101d80.html"
},
{
"标题": "百水工业园地块保障房一期项目 - D地块1#楼(公安编号)室内装饰工程",
"项目金额": "600 万元",
"发布日期": "2026-03-04",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/5f8f2183-e26f-4c03-a76a-8b4d61b0011c.html"
},
{
"标题": "青云巷10号危房整治工程 - SG1施工",
"项目金额": "375 万元",
"发布日期": "2026-03-04",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/9aa2d916-c0c3-4fb6-afa4-37457f0d2ceb.html"
},
{
"标题": "【澄清公告】全国高校区域技术转移转化中心生物药物创新平台 - 施工",
"项目金额": "11000 万元",
"发布日期": "2026-03-03",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260303/2d1fe57f-fe0e-42f9-a99a-c345683aed3f.html"
},
{
"标题": "轻质耐热合金制造基地项目 - 施工",
"项目金额": "11000 万元",
"发布日期": "2026-03-03",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260303/78b81308-1389-42fc-a8de-23b6b2b40be1.html"
},
{
"标题": "【澄清公告】润埠花园二期项目 - 监理",
"项目金额": "111.37 万元",
"发布日期": "2026-03-05",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260305/acb0010f-dcbc-4ea4-a988-e4dc75670999.html"
},
{
"标题": "轻质耐热合金制造基地项目 - 监理",
"项目金额": "188 万元",
"发布日期": "2026-03-04",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260304/93ee4804-5a5e-4524-92a3-b6c367803bd1.html"
},
{
"标题": "【澄清公告】南京江北新区无人机制造共享工厂项目 - 监理",
"项目金额": "212.44 万元",
"发布日期": "2026-03-04",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260304/e44a1d28-0f43-494e-8daf-2f81252ed06a.html"
},
{
"标题": "2026年四项环卫设施大中修项目 - 设计",
"项目金额": "25.58 万元",
"发布日期": "2026-03-03",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260303/225961f4-08c8-4398-99c9-7777bf0d16b7.html"
},
{
"标题": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程 - 监理",
"项目金额": "164.33 万元",
"发布日期": "2026-03-03",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260303/a827d48e-8e1f-42c9-bd07-09ce369c20c6.html"
},
{
"标题": "江苏银行金融科技中心建设项目 - 勘察",
"项目金额": "170 万元",
"发布日期": "2026-03-02",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/0ead5303-03db-4d95-b8ea-b32070a39dfa.html"
},
{
"标题": "【澄清公告】南京高新区溧水园和凤园区改扩建项目 - 精诚电工地块及惠诚工具地块扩建厂房设计",
"项目金额": "140.68 万元",
"发布日期": "2026-03-02",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/d8df73f9-88d0-4f5d-8831-f9857a1a4ebc.html"
},
{
"标题": "【澄清公告】NO.新区2025G11房地产开发项目 - 全过程工程咨询服务",
"项目金额": "950 万元",
"发布日期": "2026-03-02",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/348f6add-d17e-406d-9690-b637762175d7.html"
},
{
"标题": "江苏省六合高级中学新建食堂体育馆项目 - 渣土运输处置",
"项目金额": "242.97917 万元",
"发布日期": "2026-02-28",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260228/2099a860-b3c2-411f-8580-72cbb55fef42.html"
},
{
"标题": "【澄清公告】药谷产业区药谷大道(华宝路-汤盘公路)建设工程 - 勘察设计",
"项目金额": "194 万元",
"发布日期": "2026-02-28",
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260228/ffee9562-374d-43fd-8829-bf51c5b3cb46.html"
}
]
},
"model": "spark-1-mini",
"expiresAt": "2026-03-06T08:05:31.995Z",
"creditsUsed": 0
},
"id": "result-1772697933097-7hm4v"
}
]

View File

@@ -730,3 +730,166 @@ function generateReportHtml(report) {
</html> </html>
`; `;
} }
// ========== 通用抓取结果邮件(定时任务使用) ==========
export async function sendScraperResultsEmail(emailConfig, results) {
try {
const transporter = nodemailer.createTransport({
host: emailConfig.smtpHost,
port: emailConfig.smtpPort || 587,
secure: emailConfig.smtpPort === 465,
auth: {
user: emailConfig.smtpUser,
pass: emailConfig.smtpPass,
},
});
const htmlContent = generateScraperResultsHtml(results);
const successCount = results.filter(r => !r.error).length;
const info = await transporter.sendMail({
from: `"公告采集系统" <${emailConfig.smtpUser}>`,
to: emailConfig.recipients,
subject: `公告采集结果报告(${successCount}条) - ${new Date().toLocaleDateString('zh-CN')}`,
html: htmlContent,
});
return { success: true, messageId: info.messageId };
} catch (error) {
console.error('发送抓取结果邮件失败:', error);
throw new Error(`邮件发送失败: ${error.message}`);
}
}
function generateScraperResultsHtml(results) {
const successResults = results.filter(r => !r.error);
const failResults = results.filter(r => r.error);
const generatedAt = new Date().toLocaleString('zh-CN');
// 把所有成功来源的 items 展开,附带来源信息
const allRows = [];
for (const r of successResults) {
const items = r.data?.result || [];
for (const item of items) {
allRows.push({
section: [r.section, r.subsection].filter(Boolean).join(' · ') || r.city || '-',
type: r.type || '-',
title: item.title || '-',
date: item.date || '-',
amount: item.amount || '未公开',
url: item.url || '',
});
}
}
// 按日期降序排列
allRows.sort((a, b) => {
if (a.date === b.date) return 0;
return a.date > b.date ? -1 : 1;
});
const totalItems = allRows.length;
// 行颜色交替
const rowHtml = allRows.length === 0
? `<tr><td colspan="6" style="text-align:center;color:#999;padding:30px;font-size:14px;">暂无数据</td></tr>`
: allRows.map((row, i) => `
<tr style="background:${i % 2 === 0 ? '#fff' : '#f7f8ff'};">
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;color:#555;font-size:13px;">${row.section}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;">
<span style="display:inline-block;padding:2px 8px;background:#e8f4fd;color:#1a73c8;border-radius:10px;font-size:11px;font-weight:600;">${row.type}</span>
</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;font-size:13px;max-width:320px;">${row.title}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;font-size:13px;color:#555;">${row.date}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;font-size:13px;font-weight:600;color:${row.amount === '未公开' ? '#aaa' : '#e67e22'};">${row.amount}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;text-align:center;">
${row.url
? `<a href="${row.url}" target="_blank" style="color:#667eea;font-size:12px;text-decoration:none;white-space:nowrap;">查看 →</a>`
: '<span style="color:#ccc;font-size:12px;">-</span>'
}
</td>
</tr>`).join('');
// 失败来源列表
const failHtml = failResults.length === 0 ? '' : `
<div style="margin-top:24px;">
<div style="font-size:14px;font-weight:600;color:#c0392b;margin-bottom:10px;">⚠️ 抓取失败的来源(${failResults.length} 个)</div>
${failResults.map(r => `
<div style="background:#fdeaea;border-left:3px solid #e74c3c;padding:10px 14px;border-radius:4px;margin-bottom:8px;font-size:13px;">
<strong>${r.city || ''}${r.section ? ' · ' + r.section : ''}${r.type ? ' · ' + r.type : ''}</strong>
<div style="color:#999;font-size:12px;margin-top:4px;">${r.url}</div>
<div style="color:#c0392b;margin-top:4px;">❌ ${r.error}</div>
</div>`).join('')}
</div>`;
return `
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>公告采集结果报告</title>
</head>
<body style="font-family:'PingFang SC','Microsoft YaHei',Arial,sans-serif;line-height:1.6;color:#333;margin:0;padding:20px;background:#f0f2f8;">
<div style="max-width:960px;margin:0 auto;background:white;border-radius:10px;overflow:hidden;box-shadow:0 4px 20px rgba(0,0,0,.1);">
<!-- 标题栏 -->
<div style="background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);padding:24px 30px;color:white;">
<h1 style="margin:0;font-size:20px;font-weight:700;">📋 公告采集结果报告</h1>
<div style="margin-top:6px;opacity:.85;font-size:13px;">生成时间:${generatedAt}</div>
</div>
<!-- 统计栏 -->
<div style="display:flex;gap:0;border-bottom:1px solid #eaecf5;">
<div style="flex:1;padding:16px 24px;text-align:center;border-right:1px solid #eaecf5;">
<div style="font-size:28px;font-weight:700;color:#667eea;">${totalItems}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">公告总数</div>
</div>
<div style="flex:1;padding:16px 24px;text-align:center;border-right:1px solid #eaecf5;">
<div style="font-size:28px;font-weight:700;color:#1a8a4a;">${successResults.length}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">成功来源</div>
</div>
<div style="flex:1;padding:16px 24px;text-align:center;border-right:1px solid #eaecf5;">
<div style="font-size:28px;font-weight:700;color:#e67e22;">${allRows.filter(r => r.amount && r.amount !== '未公开').length}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">有金额</div>
</div>
<div style="flex:1;padding:16px 24px;text-align:center;">
<div style="font-size:28px;font-weight:700;color:${failResults.length > 0 ? '#c0392b' : '#aaa'};">${failResults.length}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">失败来源</div>
</div>
</div>
<!-- 公告汇总表格 -->
<div style="padding:24px 30px;">
<div style="font-size:15px;font-weight:600;color:#333;margin-bottom:14px;">公告汇总(共 ${totalItems} 条)</div>
<div style="overflow-x:auto;">
<table style="width:100%;border-collapse:collapse;font-size:13px;">
<thead>
<tr style="background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);color:white;">
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">板块</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">类型</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;">公告标题</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">发布日期</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">项目金额</th>
<th style="padding:10px 12px;text-align:center;font-weight:600;white-space:nowrap;">详情</th>
</tr>
</thead>
<tbody>
${rowHtml}
</tbody>
</table>
</div>
${failHtml}
<div style="margin-top:24px;padding-top:16px;border-top:1px solid #eaecf5;color:#aaa;font-size:12px;text-align:center;">
本报告由公告采集系统自动生成 · ${generatedAt}
</div>
</div>
</div>
</body>
</html>
`;
}

View File

@@ -1,503 +1,194 @@
import 'dotenv/config';
import cron from 'node-cron'; import cron from 'node-cron';
import { readFileSync } from 'fs'; import { readFileSync, writeFileSync, existsSync } from 'fs';
import { fileURLToPath } from 'url'; import { fileURLToPath } from 'url';
import { dirname, join } from 'path'; import { dirname, join } from 'path';
import axios from 'axios'; import Firecrawl from '@mendable/firecrawl-js';
import * as cheerio from 'cheerio'; import { z } from 'zod';
import iconv from 'iconv-lite'; import { sendScraperResultsEmail } from './emailService.js';
import { sendCombinedReportEmail } from './emailService.js';
const __filename = fileURLToPath(import.meta.url); const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename); const __dirname = dirname(__filename);
// 初始化 Firecrawl 客户端
const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY });
const RESULTS_PATH = join(__dirname, '..', 'results.json');
// 加载配置文件 // 加载配置文件
function loadConfig() { function loadConfig() {
try { try {
const configPath = join(__dirname, '..', 'config.json'); const configPath = join(__dirname, '..', 'config.json');
const configContent = readFileSync(configPath, 'utf-8'); return JSON.parse(readFileSync(configPath, 'utf-8'));
return JSON.parse(configContent);
} catch (error) { } catch (error) {
console.error('加载配置文件失败:', error.message); console.error('加载配置文件失败:', error.message);
console.error('请确保 config.json 文件存在并配置正确');
return null; return null;
} }
} }
// 根据时间范围类型获取开始和结束日期 // ========== 结果存取(与 server.js 保持一致) ==========
function getDateRangeByType(timeRange) {
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, '0');
const day = String(now.getDate()).padStart(2, '0');
let startDate, endDate; function readResults() {
endDate = `${year}-${month}-${day}`; // 结束日期都是今天 if (!existsSync(RESULTS_PATH)) return [];
try {
switch (timeRange) { return JSON.parse(readFileSync(RESULTS_PATH, 'utf-8'));
case 'today': } catch (e) {
// 今日 return [];
startDate = `${year}-${month}-${day}`;
break;
case 'thisWeek': {
// 本周 (从周一开始)
const dayOfWeek = now.getDay(); // 0是周日,1是周一
const diff = dayOfWeek === 0 ? 6 : dayOfWeek - 1; // 计算到周一的天数差
const monday = new Date(now);
monday.setDate(now.getDate() - diff);
const weekYear = monday.getFullYear();
const weekMonth = String(monday.getMonth() + 1).padStart(2, '0');
const weekDay = String(monday.getDate()).padStart(2, '0');
startDate = `${weekYear}-${weekMonth}-${weekDay}`;
break;
}
case 'thisMonth':
default:
// 本月
startDate = `${year}-${month}-01`;
break;
} }
return { startDate, endDate };
} }
// 南京市公共资源交易平台 - 交通水务中标结果公示 function saveResults(results) {
const BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069008/'; writeFileSync(RESULTS_PATH, JSON.stringify(results, null, 2), 'utf-8');
}
// 南京市公共资源交易平台 - 交通水务招标公告 function appendResult(result) {
const BID_ANNOUNCE_BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069001/'; const results = readResults();
results.unshift({ ...result, id: `result-${Date.now()}-${Math.random().toString(36).slice(2, 7)}` });
if (results.length > 500) results.splice(500);
saveResults(results);
}
const http = axios.create({ // ========== 统一的公告抓取 Schema ==========
responseType: 'arraybuffer',
timeout: 15000, // 公告抓取 Schemaresult 包装数组)
headers: { const announcementSchema = z.object({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', result: z.array(z.object({
}, title: z.string().describe('公告标题'),
amount: z.string().nullable().describe('项目金额(合同预估价/最高投标限价等没有则为null'),
date: z.string().describe('发布日期YYYY-MM-DD格式'),
url: z.string().describe('详情页完整URL以https://开头'),
})).describe('页面上提取到的所有公告条目'),
}); });
function pickEncoding(contentType = '') { /** 从 Firecrawl 返回结果中提取 result 数组 */
const match = /charset=([^;]+)/i.exec(contentType); function extractItems(raw) {
if (!match) return 'utf-8'; if (!raw) return [];
const charset = match[1].trim().toLowerCase(); const root = (raw.data && typeof raw.data === 'object') ? raw.data : raw;
if (charset.includes('gb')) return 'gbk'; if (Array.isArray(root.result)) return root.result;
return charset; if (root.result && typeof root.result === 'object') {
} const keys = Object.keys(root.result).filter(k => !isNaN(parseInt(k)));
if (keys.length > 0) return keys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root.result[k]);
async function fetchHtml(url) {
const res = await http.get(url);
const encoding = pickEncoding(res.headers['content-type']);
const html = iconv.decode(res.data, encoding || 'utf-8');
return html;
}
function getPageUrl(pageIndex) {
if (pageIndex === 1) {
return `${BASE_URL}moreinfosl3.html`;
} }
return `${BASE_URL}${pageIndex}.html`; if (Array.isArray(root)) return root;
const numericKeys = Object.keys(root).filter(k => !isNaN(parseInt(k)));
if (numericKeys.length > 0) return numericKeys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root[k]);
return [];
} }
// 解析列表页HTML提取中标结果信息 // ========== 抓取执行(复用 server.js 中 runScraper 的逻辑) ==========
function parseList(html) {
const $ = cheerio.load(html);
const items = [];
$('li.ewb-info-item2').each((_, row) => { async function runScraper(scraper) {
const $row = $(row); console.log(`[定时任务] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}${scraper.url}`);
const cells = $row.find('div.ewb-info-num2'); const fullPrompt = `访问这个URL: ${scraper.url}
【目标区域】:${scraper.section || ''} - ${scraper.subsection || ''}
【公告类型】:${scraper.type || ''}
if (cells.length >= 5) { ${scraper.prompt || '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等、发布日期YYYY-MM-DD格式、详情页完整URL'}
const bidNo = $(cells[0]).find('p').attr('title') || $(cells[0]).find('p').text().trim();
const projectName = $(cells[1]).find('p').attr('title') || $(cells[1]).find('p').text().trim();
const bidName = $(cells[2]).find('p').attr('title') || $(cells[2]).find('p').text().trim();
const winningPrice = $(cells[3]).find('p').text().trim(); // 中标价格
const winningDate = $(cells[4]).find('p').text().trim(); // 中标日期
const onclick = $row.attr('onclick') || ''; 请严格按照定义的 JSON 格式返回,每条公告包含 title、amount、date、url 四个字段。`;
const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/);
let href = '';
if (hrefMatch) {
href = hrefMatch[1];
if (href.startsWith('/')) {
href = `https://njggzy.nanjing.gov.cn${href}`;
}
}
if (!/^\d{4}-\d{2}-\d{2}$/.test(winningDate)) return; const result = await firecrawl.agent({
prompt: fullPrompt,
const price = parseFloat(winningPrice); schema: announcementSchema,
if (isNaN(price)) return; model: scraper.model || 'spark-1-mini',
items.push({
bidNo,
title: projectName,
bidName,
winningBid: { // 中标金额
amount: price,
unit: '万元'
},
date: winningDate,
href
});
}
}); });
return items; console.log('[定时任务] 原始返回结果:', JSON.stringify(result).slice(0, 500));
// 标准化结果
const rawItems = extractItems(result);
const items = rawItems.map(item => ({
title: item.title || '',
amount: item.amount || null,
date: item.date || '',
url: item.url || '',
}));
console.log(`[定时任务] 提取到 ${items.length} 条公告`);
const record = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
scrapedAt: new Date().toISOString(),
data: { result: items, total: items.length },
};
appendResult(record);
return record;
} }
function isDateInRange(dateStr, startDate, endDate) { // ========== 定时任务执行函数 ==========
if (!dateStr) return false;
const date = new Date(dateStr);
if (isNaN(date.getTime())) return false;
if (startDate && date < new Date(startDate)) return false;
if (endDate && date > new Date(endDate)) return false;
return true;
}
async function fetchListByDateRange(startDate, endDate, maxPages = 50) {
const allItems = [];
let shouldContinue = true;
let pageIndex = 1;
console.log(`开始按时间范围采集: ${startDate || '不限'}${endDate || '不限'}`);
while (shouldContinue && pageIndex <= maxPages) {
const pageUrl = getPageUrl(pageIndex);
console.log(`正在采集第 ${pageIndex} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const items = parseList(html);
if (items.length === 0) {
console.log(`${pageIndex} 页没有数据,停止采集`);
break;
}
let hasItemsInRange = false;
let allItemsBeforeRange = true;
for (const item of items) {
if (isDateInRange(item.date, startDate, endDate)) {
allItems.push(item);
hasItemsInRange = true;
allItemsBeforeRange = false;
} else if (startDate && new Date(item.date) < new Date(startDate)) {
allItemsBeforeRange = allItemsBeforeRange && true;
} else {
allItemsBeforeRange = false;
}
}
if (allItemsBeforeRange && startDate) {
console.log(`${pageIndex} 页所有项目都早于起始日期,停止采集`);
shouldContinue = false;
}
console.log(`${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`);
pageIndex++;
if (shouldContinue && pageIndex <= maxPages) {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`采集第 ${pageIndex} 页失败: ${err.message}`);
break;
}
}
console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的公告`);
return allItems;
}
// ========== 招标公告采集函数 ==========
// 获取招标公告分页URL
function getBidAnnouncePageUrl(pageIndex) {
if (pageIndex === 1) {
return `${BID_ANNOUNCE_BASE_URL}moreinfo5dc.html`;
}
return `${BID_ANNOUNCE_BASE_URL}${pageIndex}.html`;
}
// 解析招标公告列表页HTML
function parseBidAnnounceList(html) {
const $ = cheerio.load(html);
const items = [];
$('li.ewb-info-item2').each((_, row) => {
const $row = $(row);
const onclick = $row.attr('onclick') || '';
const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/);
if (!hrefMatch) return;
let href = hrefMatch[1];
if (href.startsWith('/')) {
href = `https://njggzy.nanjing.gov.cn${href}`;
}
const $titleP = $row.find('.ewb-info-num2').first().find('p');
const title = $titleP.attr('title') || $titleP.text().trim();
const $dateP = $row.find('.ewb-info-num2').last().find('p');
const dateText = $dateP.text().trim();
const dateMatch = dateText.match(/\d{4}-\d{2}-\d{2}/);
const date = dateMatch ? dateMatch[0] : '';
if (title && date) {
items.push({
title,
date,
href,
estimatedAmount: null
});
}
});
return items;
}
// 解析招标公告详情页,获取合同估算价
async function fetchBidAnnounceDetail(url) {
try {
const html = await fetchHtml(url);
const $ = cheerio.load(html);
const bodyText = $('body').text();
const amountMatch = bodyText.match(/合同估算价[:]\s*([\d,]+\.?\d*)\s*元/);
let estimatedAmount = null;
if (amountMatch) {
const amountStr = amountMatch[1].replace(/,/g, '');
estimatedAmount = parseFloat(amountStr);
}
const bidCodeMatch = bodyText.match(/标段编码[:]\s*([A-Za-z0-9\-]+)/);
const bidCode = bidCodeMatch ? bidCodeMatch[1] : null;
const tendereeMatch = bodyText.match(/招标人[为是][:]?\s*([^\s,,。]+)/);
const tenderee = tendereeMatch ? tendereeMatch[1] : null;
const durationMatch = bodyText.match(/计划工期[:]\s*(\d+)\s*日历天/);
const duration = durationMatch ? parseInt(durationMatch[1]) : null;
return { estimatedAmount, bidCode, tenderee, duration, url };
} catch (error) {
console.error(`获取招标详情失败 ${url}: ${error.message}`);
return { estimatedAmount: null, url };
}
}
// 按时间范围采集招标公告
async function fetchBidAnnounceByDateRange(startDate, endDate, maxPages = 20) {
const allItems = [];
let shouldContinue = true;
let pageIndex = 1;
console.log(`开始采集招标公告: ${startDate || '不限'}${endDate || '不限'}`);
while (shouldContinue && pageIndex <= maxPages) {
const pageUrl = getBidAnnouncePageUrl(pageIndex);
console.log(`正在采集招标公告第 ${pageIndex} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const items = parseBidAnnounceList(html);
if (items.length === 0) {
console.log(`${pageIndex} 页没有数据,停止采集`);
break;
}
let hasItemsInRange = false;
let allItemsBeforeRange = true;
for (const item of items) {
if (isDateInRange(item.date, startDate, endDate)) {
allItems.push(item);
hasItemsInRange = true;
allItemsBeforeRange = false;
} else if (startDate && new Date(item.date) < new Date(startDate)) {
allItemsBeforeRange = allItemsBeforeRange && true;
} else {
allItemsBeforeRange = false;
}
}
if (allItemsBeforeRange && startDate) {
console.log(`${pageIndex} 页所有项目都早于起始日期,停止采集`);
shouldContinue = false;
}
console.log(`${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`);
pageIndex++;
if (shouldContinue && pageIndex <= maxPages) {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`采集第 ${pageIndex} 页失败: ${err.message}`);
break;
}
}
console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的招标公告`);
// 获取详情(合同估算价)
if (allItems.length > 0) {
console.log(`开始获取 ${allItems.length} 条招标公告的详情...`);
for (let i = 0; i < allItems.length; i++) {
const item = allItems[i];
console.log(`获取详情 ${i + 1}/${allItems.length}: ${item.title.substring(0, 30)}...`);
const detail = await fetchBidAnnounceDetail(item.href);
item.estimatedAmount = detail.estimatedAmount;
item.bidCode = detail.bidCode;
item.tenderee = detail.tenderee;
item.duration = detail.duration;
if (i < allItems.length - 1) {
await new Promise(resolve => setTimeout(resolve, 300));
}
}
console.log('招标公告详情获取完成');
}
return allItems;
}
// 定时任务执行函数
async function executeScheduledTask(config) { async function executeScheduledTask(config) {
try { try {
console.log('========================================'); console.log('========================================');
console.log('定时任务开始执行(综合采集)'); console.log('定时任务开始执行');
console.log('执行时间:', new Date().toLocaleString('zh-CN')); console.log('执行时间:', new Date().toLocaleString('zh-CN'));
console.log('========================================'); console.log('========================================');
const timeRange = config.scheduler.timeRange || 'thisMonth'; // 获取所有已启用的抓取来源
const { startDate, endDate } = getDateRangeByType(timeRange); const scrapers = (config.scrapers || []).filter(s => s.enabled);
const winningThreshold = config.scheduler.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000; // 中标阈值默认1亿(10000万元)
const bidThreshold = config.scheduler.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0; // 招标阈值默认0(不筛选)
const timeRangeNames = { if (scrapers.length === 0) {
'today': '今日', console.log('没有已启用的抓取来源,跳过');
'thisWeek': '本周',
'thisMonth': '本月'
};
console.log(`采集时间段: ${timeRangeNames[timeRange] || '本月'}`);
console.log(`采集时间范围: ${startDate}${endDate}`);
console.log(`中标金额阈值: ${winningThreshold}万元 (${(winningThreshold / 10000).toFixed(2)}亿元)`);
console.log(`招标金额阈值: ${bidThreshold}万元 ${bidThreshold === 0 ? '(不筛选)' : `(${(bidThreshold / 10000).toFixed(2)}亿元)`}`);
// ========== 1. 采集中标公示 ==========
console.log('\n========== 采集中标公示 ==========');
const winningItems = await fetchListByDateRange(startDate, endDate, 50);
// 筛选大于阈值的中标项目
const winningFiltered = winningItems.filter((item) => {
return item.winningBid && item.winningBid.amount > winningThreshold;
});
const winningTotal = winningFiltered.reduce(
(sum, item) => sum + (item.winningBid?.amount || 0),
0
);
console.log(`中标公示: 采集 ${winningItems.length} 条,符合阈值 ${winningFiltered.length}`);
// 生成中标报告
const winningReport = {
summary: {
total_count: winningItems.length,
filtered_count: winningFiltered.length,
threshold: `${winningThreshold}万元`,
total_amount: `${winningTotal.toFixed(2)}万元`,
generated_at: new Date().toISOString(),
date_range: { startDate, endDate },
},
projects: winningFiltered.map((item) => ({
bidNo: item.bidNo,
title: item.title,
bidName: item.bidName,
date: item.date,
winningBid: item.winningBid,
url: item.href,
})),
};
// ========== 2. 采集招标公告 ==========
console.log('\n========== 采集招标公告 ==========');
const bidItems = await fetchBidAnnounceByDateRange(startDate, endDate, 20);
// 筛选招标项目根据阈值筛选阈值为0时不筛选只要求有金额
const bidFiltered = bidItems.filter(item => {
if (!item.estimatedAmount) return false;
if (bidThreshold === 0) return true; // 阈值为0时不筛选
return item.estimatedAmount / 10000 > bidThreshold; // 估算价是元,阈值是万元,需要转换
});
const bidTotal = bidFiltered.reduce(
(sum, item) => sum + (item.estimatedAmount || 0),
0
);
console.log(`招标公告: 采集 ${bidItems.length} 条,有金额 ${bidFiltered.length}`);
// 生成招标报告
const bidReport = {
summary: {
total_count: bidItems.length,
filtered_count: bidFiltered.length,
has_amount_count: bidFiltered.length,
threshold: bidThreshold === 0 ? '无' : `${bidThreshold}万元`,
total_amount: `${(bidTotal / 10000).toFixed(2)}万元`,
total_amount_yuan: bidTotal,
generated_at: new Date().toISOString(),
date_range: { startDate, endDate },
report_type: '招标公告'
},
projects: bidFiltered.map((item) => ({
title: item.title,
bidCode: item.bidCode,
tenderee: item.tenderee,
date: item.date,
duration: item.duration,
estimatedAmount: item.estimatedAmount ? {
amount: item.estimatedAmount,
amountWan: (item.estimatedAmount / 10000).toFixed(2),
unit: '元'
} : null,
url: item.href,
})),
};
// ========== 3. 检查是否有数据需要发送 ==========
if (winningFiltered.length === 0 && bidFiltered.length === 0) {
console.log('\n========================================');
console.log('暂无符合条件的项目,不发送邮件');
console.log('========================================');
return; return;
} }
// ========== 4. 发送综合邮件 ========== console.log(`${scrapers.length} 个已启用的抓取来源`);
console.log('\n========================================');
console.log('正在发送综合报告邮件...');
const emailConfig = config.email;
const result = await sendCombinedReportEmail(emailConfig, winningReport, bidReport); // 逐个运行抓取任务
const results = [];
for (const scraper of scrapers) {
try {
console.log(`\n---------- 抓取: ${scraper.city} - ${scraper.section} ${scraper.type} ----------`);
const r = await runScraper(scraper);
results.push(r);
console.log(`✓ 抓取成功`);
} catch (err) {
console.error(`✗ 抓取失败: ${err.message}`);
const errRecord = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
scrapedAt: new Date().toISOString(),
error: err.message,
data: null,
};
appendResult(errRecord);
results.push(errRecord);
}
}
const successCount = results.filter(r => !r.error).length;
const failCount = results.filter(r => r.error).length;
console.log(`\n========== 抓取完成 ==========`);
console.log(`成功: ${successCount} 条,失败: ${failCount}`);
// 检查是否需要发送邮件
if (successCount === 0) {
console.log('没有成功的抓取结果,不发送邮件');
return;
}
// 发送邮件报告
if (config.email?.smtpHost && config.email?.smtpUser) {
console.log('\n正在发送抓取结果邮件...');
try {
const emailResult = await sendScraperResultsEmail(config.email, results);
console.log('邮件发送成功! MessageId:', emailResult.messageId);
} catch (emailErr) {
console.error('邮件发送失败:', emailErr.message);
}
} else {
console.log('邮件配置不完整,跳过邮件发送');
}
console.log('邮件发送成功!');
console.log('收件人:', emailConfig.recipients);
console.log('MessageId:', result.messageId);
console.log(`内容: 中标公示 ${winningFiltered.length} 条,招标公告 ${bidFiltered.length}`);
console.log('========================================');
console.log('定时任务执行完成');
console.log('========================================'); console.log('========================================');
} catch (error) { } catch (error) {
@@ -511,96 +202,60 @@ async function executeScheduledTask(config) {
// 存储当前的定时任务 // 存储当前的定时任务
let currentScheduledTask = null; let currentScheduledTask = null;
// 初始化定时任务
export function initScheduler() { export function initScheduler() {
const config = loadConfig(); const config = loadConfig();
if (!config) { console.error('无法启动定时任务: 配置文件加载失败'); return; }
if (!config) { if (!config.scheduler?.enabled) { console.log('定时任务已禁用'); return; }
console.error('无法启动定时任务: 配置文件加载失败');
return;
}
if (!config.scheduler || !config.scheduler.enabled) {
console.log('定时任务已禁用');
return;
}
if (!config.email || !config.email.smtpHost || !config.email.smtpUser) {
console.error('无法启动定时任务: 邮件配置不完整');
console.error('请在 config.json 中配置邮件信息');
return;
}
const cronTime = config.scheduler.cronTime || '0 9 * * *'; const cronTime = config.scheduler.cronTime || '0 9 * * *';
const enabledCount = (config.scrapers || []).filter(s => s.enabled).length;
console.log('========================================'); console.log('========================================');
console.log('定时任务已启动'); console.log('定时任务已启动,执行计划:', cronTime);
console.log('执行计划:', cronTime); console.log(`已启用的抓取来源: ${enabledCount}`);
console.log('中标阈值:', config.scheduler.winningThreshold, '万元'); if (config.email?.recipients) console.log('收件人:', config.email.recipients);
console.log('招标阈值:', config.scheduler.bidThreshold, '万元', config.scheduler.bidThreshold === 0 ? '(不筛选)' : '');
console.log('收件人:', config.email.recipients);
console.log('========================================'); console.log('========================================');
// 如果已有任务在运行,先停止 if (currentScheduledTask) { currentScheduledTask.stop(); }
if (currentScheduledTask) {
currentScheduledTask.stop();
console.log('已停止旧的定时任务');
}
// 创建定时任务
currentScheduledTask = cron.schedule(cronTime, () => { currentScheduledTask = cron.schedule(cronTime, () => {
executeScheduledTask(config); // 每次执行时重新加载配置,确保使用最新的 scrapers
}, { const latestConfig = loadConfig();
timezone: 'Asia/Shanghai' if (latestConfig) {
}); executeScheduledTask(latestConfig);
}
}, { timezone: 'Asia/Shanghai' });
} }
// 重新加载配置并重启定时任务
export function reloadScheduler() { export function reloadScheduler() {
console.log('重新加载定时任务配置...'); console.log('重新加载定时任务配置...');
if (currentScheduledTask) { currentScheduledTask.stop(); currentScheduledTask = null; }
// 停止当前任务
if (currentScheduledTask) {
currentScheduledTask.stop();
currentScheduledTask = null;
console.log('已停止当前定时任务');
}
// 重新初始化
initScheduler(); initScheduler();
} }
// 停止定时任务
export function stopScheduler() { export function stopScheduler() {
if (currentScheduledTask) { if (currentScheduledTask) {
currentScheduledTask.stop(); currentScheduledTask.stop(); currentScheduledTask = null;
currentScheduledTask = null; console.log('定时任务已停止'); return true;
console.log('定时任务已停止');
return true;
} }
return false; return false;
} }
// 获取定时任务状态
export function getSchedulerStatus() { export function getSchedulerStatus() {
const config = loadConfig(); const config = loadConfig();
const enabledScrapers = (config?.scrapers || []).filter(s => s.enabled).length;
return { return {
isRunning: currentScheduledTask !== null, isRunning: currentScheduledTask !== null,
enabledScrapers,
config: config ? { config: config ? {
enabled: config.scheduler?.enabled || false, enabled: config.scheduler?.enabled || false,
cronTime: config.scheduler?.cronTime || '0 9 * * *', cronTime: config.scheduler?.cronTime || '0 9 * * *',
winningThreshold: config.scheduler?.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000, description: config.scheduler?.description || '',
bidThreshold: config.scheduler?.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0,
timeRange: config.scheduler?.timeRange || 'thisMonth',
} : null, } : null,
}; };
} }
// 手动执行任务(用于测试)
export async function runTaskNow() { export async function runTaskNow() {
const config = loadConfig(); const config = loadConfig();
if (!config) { if (!config) throw new Error('配置文件加载失败');
throw new Error('配置文件加载失败');
}
await executeScheduledTask(config); await executeScheduledTask(config);
} }

File diff suppressed because it is too large Load Diff