feat: 使用firecrawl 实现公告抓取与分析工具的网页界面,包括报告生成、导出和邮件发送功能。

This commit is contained in:
2026-03-06 15:37:56 +08:00
parent e3766b86be
commit ad659c4ff0
11 changed files with 3190 additions and 1490 deletions

View File

@@ -730,3 +730,166 @@ function generateReportHtml(report) {
</html>
`;
}
// ========== 通用抓取结果邮件(定时任务使用) ==========
export async function sendScraperResultsEmail(emailConfig, results) {
try {
const transporter = nodemailer.createTransport({
host: emailConfig.smtpHost,
port: emailConfig.smtpPort || 587,
secure: emailConfig.smtpPort === 465,
auth: {
user: emailConfig.smtpUser,
pass: emailConfig.smtpPass,
},
});
const htmlContent = generateScraperResultsHtml(results);
const successCount = results.filter(r => !r.error).length;
const info = await transporter.sendMail({
from: `"公告采集系统" <${emailConfig.smtpUser}>`,
to: emailConfig.recipients,
subject: `公告采集结果报告(${successCount}条) - ${new Date().toLocaleDateString('zh-CN')}`,
html: htmlContent,
});
return { success: true, messageId: info.messageId };
} catch (error) {
console.error('发送抓取结果邮件失败:', error);
throw new Error(`邮件发送失败: ${error.message}`);
}
}
function generateScraperResultsHtml(results) {
const successResults = results.filter(r => !r.error);
const failResults = results.filter(r => r.error);
const generatedAt = new Date().toLocaleString('zh-CN');
// 把所有成功来源的 items 展开,附带来源信息
const allRows = [];
for (const r of successResults) {
const items = r.data?.result || [];
for (const item of items) {
allRows.push({
section: [r.section, r.subsection].filter(Boolean).join(' · ') || r.city || '-',
type: r.type || '-',
title: item.title || '-',
date: item.date || '-',
amount: item.amount || '未公开',
url: item.url || '',
});
}
}
// 按日期降序排列
allRows.sort((a, b) => {
if (a.date === b.date) return 0;
return a.date > b.date ? -1 : 1;
});
const totalItems = allRows.length;
// 行颜色交替
const rowHtml = allRows.length === 0
? `<tr><td colspan="6" style="text-align:center;color:#999;padding:30px;font-size:14px;">暂无数据</td></tr>`
: allRows.map((row, i) => `
<tr style="background:${i % 2 === 0 ? '#fff' : '#f7f8ff'};">
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;color:#555;font-size:13px;">${row.section}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;">
<span style="display:inline-block;padding:2px 8px;background:#e8f4fd;color:#1a73c8;border-radius:10px;font-size:11px;font-weight:600;">${row.type}</span>
</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;font-size:13px;max-width:320px;">${row.title}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;font-size:13px;color:#555;">${row.date}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;white-space:nowrap;font-size:13px;font-weight:600;color:${row.amount === '未公开' ? '#aaa' : '#e67e22'};">${row.amount}</td>
<td style="padding:9px 12px;border-bottom:1px solid #eaecf5;text-align:center;">
${row.url
? `<a href="${row.url}" target="_blank" style="color:#667eea;font-size:12px;text-decoration:none;white-space:nowrap;">查看 →</a>`
: '<span style="color:#ccc;font-size:12px;">-</span>'
}
</td>
</tr>`).join('');
// 失败来源列表
const failHtml = failResults.length === 0 ? '' : `
<div style="margin-top:24px;">
<div style="font-size:14px;font-weight:600;color:#c0392b;margin-bottom:10px;">⚠️ 抓取失败的来源(${failResults.length} 个)</div>
${failResults.map(r => `
<div style="background:#fdeaea;border-left:3px solid #e74c3c;padding:10px 14px;border-radius:4px;margin-bottom:8px;font-size:13px;">
<strong>${r.city || ''}${r.section ? ' · ' + r.section : ''}${r.type ? ' · ' + r.type : ''}</strong>
<div style="color:#999;font-size:12px;margin-top:4px;">${r.url}</div>
<div style="color:#c0392b;margin-top:4px;">❌ ${r.error}</div>
</div>`).join('')}
</div>`;
return `
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>公告采集结果报告</title>
</head>
<body style="font-family:'PingFang SC','Microsoft YaHei',Arial,sans-serif;line-height:1.6;color:#333;margin:0;padding:20px;background:#f0f2f8;">
<div style="max-width:960px;margin:0 auto;background:white;border-radius:10px;overflow:hidden;box-shadow:0 4px 20px rgba(0,0,0,.1);">
<!-- 标题栏 -->
<div style="background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);padding:24px 30px;color:white;">
<h1 style="margin:0;font-size:20px;font-weight:700;">📋 公告采集结果报告</h1>
<div style="margin-top:6px;opacity:.85;font-size:13px;">生成时间:${generatedAt}</div>
</div>
<!-- 统计栏 -->
<div style="display:flex;gap:0;border-bottom:1px solid #eaecf5;">
<div style="flex:1;padding:16px 24px;text-align:center;border-right:1px solid #eaecf5;">
<div style="font-size:28px;font-weight:700;color:#667eea;">${totalItems}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">公告总数</div>
</div>
<div style="flex:1;padding:16px 24px;text-align:center;border-right:1px solid #eaecf5;">
<div style="font-size:28px;font-weight:700;color:#1a8a4a;">${successResults.length}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">成功来源</div>
</div>
<div style="flex:1;padding:16px 24px;text-align:center;border-right:1px solid #eaecf5;">
<div style="font-size:28px;font-weight:700;color:#e67e22;">${allRows.filter(r => r.amount && r.amount !== '未公开').length}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">有金额</div>
</div>
<div style="flex:1;padding:16px 24px;text-align:center;">
<div style="font-size:28px;font-weight:700;color:${failResults.length > 0 ? '#c0392b' : '#aaa'};">${failResults.length}</div>
<div style="font-size:12px;color:#888;margin-top:2px;">失败来源</div>
</div>
</div>
<!-- 公告汇总表格 -->
<div style="padding:24px 30px;">
<div style="font-size:15px;font-weight:600;color:#333;margin-bottom:14px;">公告汇总(共 ${totalItems} 条)</div>
<div style="overflow-x:auto;">
<table style="width:100%;border-collapse:collapse;font-size:13px;">
<thead>
<tr style="background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);color:white;">
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">板块</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">类型</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;">公告标题</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">发布日期</th>
<th style="padding:10px 12px;text-align:left;font-weight:600;white-space:nowrap;">项目金额</th>
<th style="padding:10px 12px;text-align:center;font-weight:600;white-space:nowrap;">详情</th>
</tr>
</thead>
<tbody>
${rowHtml}
</tbody>
</table>
</div>
${failHtml}
<div style="margin-top:24px;padding-top:16px;border-top:1px solid #eaecf5;color:#aaa;font-size:12px;text-align:center;">
本报告由公告采集系统自动生成 · ${generatedAt}
</div>
</div>
</div>
</body>
</html>
`;
}

View File

@@ -1,503 +1,194 @@
import 'dotenv/config';
import cron from 'node-cron';
import { readFileSync } from 'fs';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import axios from 'axios';
import * as cheerio from 'cheerio';
import iconv from 'iconv-lite';
import { sendCombinedReportEmail } from './emailService.js';
import Firecrawl from '@mendable/firecrawl-js';
import { z } from 'zod';
import { sendScraperResultsEmail } from './emailService.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// 初始化 Firecrawl 客户端
const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY });
const RESULTS_PATH = join(__dirname, '..', 'results.json');
// 加载配置文件
function loadConfig() {
try {
const configPath = join(__dirname, '..', 'config.json');
const configContent = readFileSync(configPath, 'utf-8');
return JSON.parse(configContent);
return JSON.parse(readFileSync(configPath, 'utf-8'));
} catch (error) {
console.error('加载配置文件失败:', error.message);
console.error('请确保 config.json 文件存在并配置正确');
return null;
}
}
// 根据时间范围类型获取开始和结束日期
function getDateRangeByType(timeRange) {
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, '0');
const day = String(now.getDate()).padStart(2, '0');
// ========== 结果存取(与 server.js 保持一致) ==========
let startDate, endDate;
endDate = `${year}-${month}-${day}`; // 结束日期都是今天
switch (timeRange) {
case 'today':
// 今日
startDate = `${year}-${month}-${day}`;
break;
case 'thisWeek': {
// 本周 (从周一开始)
const dayOfWeek = now.getDay(); // 0是周日,1是周一
const diff = dayOfWeek === 0 ? 6 : dayOfWeek - 1; // 计算到周一的天数差
const monday = new Date(now);
monday.setDate(now.getDate() - diff);
const weekYear = monday.getFullYear();
const weekMonth = String(monday.getMonth() + 1).padStart(2, '0');
const weekDay = String(monday.getDate()).padStart(2, '0');
startDate = `${weekYear}-${weekMonth}-${weekDay}`;
break;
}
case 'thisMonth':
default:
// 本月
startDate = `${year}-${month}-01`;
break;
function readResults() {
if (!existsSync(RESULTS_PATH)) return [];
try {
return JSON.parse(readFileSync(RESULTS_PATH, 'utf-8'));
} catch (e) {
return [];
}
return { startDate, endDate };
}
// 南京市公共资源交易平台 - 交通水务中标结果公示
const BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069008/';
function saveResults(results) {
writeFileSync(RESULTS_PATH, JSON.stringify(results, null, 2), 'utf-8');
}
// 南京市公共资源交易平台 - 交通水务招标公告
const BID_ANNOUNCE_BASE_URL = 'https://njggzy.nanjing.gov.cn/njweb/jtsw/069001/';
function appendResult(result) {
const results = readResults();
results.unshift({ ...result, id: `result-${Date.now()}-${Math.random().toString(36).slice(2, 7)}` });
if (results.length > 500) results.splice(500);
saveResults(results);
}
const http = axios.create({
responseType: 'arraybuffer',
timeout: 15000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
},
// ========== 统一的公告抓取 Schema ==========
// 公告抓取 Schemaresult 包装数组)
const announcementSchema = z.object({
result: z.array(z.object({
title: z.string().describe('公告标题'),
amount: z.string().nullable().describe('项目金额(合同预估价/最高投标限价等没有则为null'),
date: z.string().describe('发布日期YYYY-MM-DD格式'),
url: z.string().describe('详情页完整URL以https://开头'),
})).describe('页面上提取到的所有公告条目'),
});
function pickEncoding(contentType = '') {
const match = /charset=([^;]+)/i.exec(contentType);
if (!match) return 'utf-8';
const charset = match[1].trim().toLowerCase();
if (charset.includes('gb')) return 'gbk';
return charset;
}
async function fetchHtml(url) {
const res = await http.get(url);
const encoding = pickEncoding(res.headers['content-type']);
const html = iconv.decode(res.data, encoding || 'utf-8');
return html;
}
function getPageUrl(pageIndex) {
if (pageIndex === 1) {
return `${BASE_URL}moreinfosl3.html`;
/** 从 Firecrawl 返回结果中提取 result 数组 */
function extractItems(raw) {
if (!raw) return [];
const root = (raw.data && typeof raw.data === 'object') ? raw.data : raw;
if (Array.isArray(root.result)) return root.result;
if (root.result && typeof root.result === 'object') {
const keys = Object.keys(root.result).filter(k => !isNaN(parseInt(k)));
if (keys.length > 0) return keys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root.result[k]);
}
return `${BASE_URL}${pageIndex}.html`;
if (Array.isArray(root)) return root;
const numericKeys = Object.keys(root).filter(k => !isNaN(parseInt(k)));
if (numericKeys.length > 0) return numericKeys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root[k]);
return [];
}
// 解析列表页HTML提取中标结果信息
function parseList(html) {
const $ = cheerio.load(html);
const items = [];
// ========== 抓取执行(复用 server.js 中 runScraper 的逻辑) ==========
$('li.ewb-info-item2').each((_, row) => {
const $row = $(row);
const cells = $row.find('div.ewb-info-num2');
async function runScraper(scraper) {
console.log(`[定时任务] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}${scraper.url}`);
const fullPrompt = `访问这个URL: ${scraper.url}
【目标区域】:${scraper.section || ''} - ${scraper.subsection || ''}
【公告类型】:${scraper.type || ''}
if (cells.length >= 5) {
const bidNo = $(cells[0]).find('p').attr('title') || $(cells[0]).find('p').text().trim();
const projectName = $(cells[1]).find('p').attr('title') || $(cells[1]).find('p').text().trim();
const bidName = $(cells[2]).find('p').attr('title') || $(cells[2]).find('p').text().trim();
const winningPrice = $(cells[3]).find('p').text().trim(); // 中标价格
const winningDate = $(cells[4]).find('p').text().trim(); // 中标日期
${scraper.prompt || '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等、发布日期YYYY-MM-DD格式、详情页完整URL'}
const onclick = $row.attr('onclick') || '';
const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/);
let href = '';
if (hrefMatch) {
href = hrefMatch[1];
if (href.startsWith('/')) {
href = `https://njggzy.nanjing.gov.cn${href}`;
}
}
请严格按照定义的 JSON 格式返回,每条公告包含 title、amount、date、url 四个字段。`;
if (!/^\d{4}-\d{2}-\d{2}$/.test(winningDate)) return;
const price = parseFloat(winningPrice);
if (isNaN(price)) return;
items.push({
bidNo,
title: projectName,
bidName,
winningBid: { // 中标金额
amount: price,
unit: '万元'
},
date: winningDate,
href
});
}
const result = await firecrawl.agent({
prompt: fullPrompt,
schema: announcementSchema,
model: scraper.model || 'spark-1-mini',
});
return items;
console.log('[定时任务] 原始返回结果:', JSON.stringify(result).slice(0, 500));
// 标准化结果
const rawItems = extractItems(result);
const items = rawItems.map(item => ({
title: item.title || '',
amount: item.amount || null,
date: item.date || '',
url: item.url || '',
}));
console.log(`[定时任务] 提取到 ${items.length} 条公告`);
const record = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
scrapedAt: new Date().toISOString(),
data: { result: items, total: items.length },
};
appendResult(record);
return record;
}
function isDateInRange(dateStr, startDate, endDate) {
if (!dateStr) return false;
const date = new Date(dateStr);
if (isNaN(date.getTime())) return false;
// ========== 定时任务执行函数 ==========
if (startDate && date < new Date(startDate)) return false;
if (endDate && date > new Date(endDate)) return false;
return true;
}
async function fetchListByDateRange(startDate, endDate, maxPages = 50) {
const allItems = [];
let shouldContinue = true;
let pageIndex = 1;
console.log(`开始按时间范围采集: ${startDate || '不限'}${endDate || '不限'}`);
while (shouldContinue && pageIndex <= maxPages) {
const pageUrl = getPageUrl(pageIndex);
console.log(`正在采集第 ${pageIndex} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const items = parseList(html);
if (items.length === 0) {
console.log(`${pageIndex} 页没有数据,停止采集`);
break;
}
let hasItemsInRange = false;
let allItemsBeforeRange = true;
for (const item of items) {
if (isDateInRange(item.date, startDate, endDate)) {
allItems.push(item);
hasItemsInRange = true;
allItemsBeforeRange = false;
} else if (startDate && new Date(item.date) < new Date(startDate)) {
allItemsBeforeRange = allItemsBeforeRange && true;
} else {
allItemsBeforeRange = false;
}
}
if (allItemsBeforeRange && startDate) {
console.log(`${pageIndex} 页所有项目都早于起始日期,停止采集`);
shouldContinue = false;
}
console.log(`${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`);
pageIndex++;
if (shouldContinue && pageIndex <= maxPages) {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`采集第 ${pageIndex} 页失败: ${err.message}`);
break;
}
}
console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的公告`);
return allItems;
}
// ========== 招标公告采集函数 ==========
// 获取招标公告分页URL
function getBidAnnouncePageUrl(pageIndex) {
if (pageIndex === 1) {
return `${BID_ANNOUNCE_BASE_URL}moreinfo5dc.html`;
}
return `${BID_ANNOUNCE_BASE_URL}${pageIndex}.html`;
}
// 解析招标公告列表页HTML
function parseBidAnnounceList(html) {
const $ = cheerio.load(html);
const items = [];
$('li.ewb-info-item2').each((_, row) => {
const $row = $(row);
const onclick = $row.attr('onclick') || '';
const hrefMatch = onclick.match(/window\.open\(['"]([^'"]+)['"]\)/);
if (!hrefMatch) return;
let href = hrefMatch[1];
if (href.startsWith('/')) {
href = `https://njggzy.nanjing.gov.cn${href}`;
}
const $titleP = $row.find('.ewb-info-num2').first().find('p');
const title = $titleP.attr('title') || $titleP.text().trim();
const $dateP = $row.find('.ewb-info-num2').last().find('p');
const dateText = $dateP.text().trim();
const dateMatch = dateText.match(/\d{4}-\d{2}-\d{2}/);
const date = dateMatch ? dateMatch[0] : '';
if (title && date) {
items.push({
title,
date,
href,
estimatedAmount: null
});
}
});
return items;
}
// 解析招标公告详情页,获取合同估算价
async function fetchBidAnnounceDetail(url) {
try {
const html = await fetchHtml(url);
const $ = cheerio.load(html);
const bodyText = $('body').text();
const amountMatch = bodyText.match(/合同估算价[:]\s*([\d,]+\.?\d*)\s*元/);
let estimatedAmount = null;
if (amountMatch) {
const amountStr = amountMatch[1].replace(/,/g, '');
estimatedAmount = parseFloat(amountStr);
}
const bidCodeMatch = bodyText.match(/标段编码[:]\s*([A-Za-z0-9\-]+)/);
const bidCode = bidCodeMatch ? bidCodeMatch[1] : null;
const tendereeMatch = bodyText.match(/招标人[为是][:]?\s*([^\s,,。]+)/);
const tenderee = tendereeMatch ? tendereeMatch[1] : null;
const durationMatch = bodyText.match(/计划工期[:]\s*(\d+)\s*日历天/);
const duration = durationMatch ? parseInt(durationMatch[1]) : null;
return { estimatedAmount, bidCode, tenderee, duration, url };
} catch (error) {
console.error(`获取招标详情失败 ${url}: ${error.message}`);
return { estimatedAmount: null, url };
}
}
// 按时间范围采集招标公告
async function fetchBidAnnounceByDateRange(startDate, endDate, maxPages = 20) {
const allItems = [];
let shouldContinue = true;
let pageIndex = 1;
console.log(`开始采集招标公告: ${startDate || '不限'}${endDate || '不限'}`);
while (shouldContinue && pageIndex <= maxPages) {
const pageUrl = getBidAnnouncePageUrl(pageIndex);
console.log(`正在采集招标公告第 ${pageIndex} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const items = parseBidAnnounceList(html);
if (items.length === 0) {
console.log(`${pageIndex} 页没有数据,停止采集`);
break;
}
let hasItemsInRange = false;
let allItemsBeforeRange = true;
for (const item of items) {
if (isDateInRange(item.date, startDate, endDate)) {
allItems.push(item);
hasItemsInRange = true;
allItemsBeforeRange = false;
} else if (startDate && new Date(item.date) < new Date(startDate)) {
allItemsBeforeRange = allItemsBeforeRange && true;
} else {
allItemsBeforeRange = false;
}
}
if (allItemsBeforeRange && startDate) {
console.log(`${pageIndex} 页所有项目都早于起始日期,停止采集`);
shouldContinue = false;
}
console.log(`${pageIndex} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`);
pageIndex++;
if (shouldContinue && pageIndex <= maxPages) {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`采集第 ${pageIndex} 页失败: ${err.message}`);
break;
}
}
console.log(`总共采集了 ${pageIndex - 1} 页,找到 ${allItems.length} 条符合条件的招标公告`);
// 获取详情(合同估算价)
if (allItems.length > 0) {
console.log(`开始获取 ${allItems.length} 条招标公告的详情...`);
for (let i = 0; i < allItems.length; i++) {
const item = allItems[i];
console.log(`获取详情 ${i + 1}/${allItems.length}: ${item.title.substring(0, 30)}...`);
const detail = await fetchBidAnnounceDetail(item.href);
item.estimatedAmount = detail.estimatedAmount;
item.bidCode = detail.bidCode;
item.tenderee = detail.tenderee;
item.duration = detail.duration;
if (i < allItems.length - 1) {
await new Promise(resolve => setTimeout(resolve, 300));
}
}
console.log('招标公告详情获取完成');
}
return allItems;
}
// 定时任务执行函数
async function executeScheduledTask(config) {
try {
console.log('========================================');
console.log('定时任务开始执行(综合采集)');
console.log('定时任务开始执行');
console.log('执行时间:', new Date().toLocaleString('zh-CN'));
console.log('========================================');
const timeRange = config.scheduler.timeRange || 'thisMonth';
const { startDate, endDate } = getDateRangeByType(timeRange);
const winningThreshold = config.scheduler.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000; // 中标阈值默认1亿(10000万元)
const bidThreshold = config.scheduler.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0; // 招标阈值默认0(不筛选)
// 获取所有已启用的抓取来源
const scrapers = (config.scrapers || []).filter(s => s.enabled);
const timeRangeNames = {
'today': '今日',
'thisWeek': '本周',
'thisMonth': '本月'
};
console.log(`采集时间段: ${timeRangeNames[timeRange] || '本月'}`);
console.log(`采集时间范围: ${startDate}${endDate}`);
console.log(`中标金额阈值: ${winningThreshold}万元 (${(winningThreshold / 10000).toFixed(2)}亿元)`);
console.log(`招标金额阈值: ${bidThreshold}万元 ${bidThreshold === 0 ? '(不筛选)' : `(${(bidThreshold / 10000).toFixed(2)}亿元)`}`);
// ========== 1. 采集中标公示 ==========
console.log('\n========== 采集中标公示 ==========');
const winningItems = await fetchListByDateRange(startDate, endDate, 50);
// 筛选大于阈值的中标项目
const winningFiltered = winningItems.filter((item) => {
return item.winningBid && item.winningBid.amount > winningThreshold;
});
const winningTotal = winningFiltered.reduce(
(sum, item) => sum + (item.winningBid?.amount || 0),
0
);
console.log(`中标公示: 采集 ${winningItems.length} 条,符合阈值 ${winningFiltered.length}`);
// 生成中标报告
const winningReport = {
summary: {
total_count: winningItems.length,
filtered_count: winningFiltered.length,
threshold: `${winningThreshold}万元`,
total_amount: `${winningTotal.toFixed(2)}万元`,
generated_at: new Date().toISOString(),
date_range: { startDate, endDate },
},
projects: winningFiltered.map((item) => ({
bidNo: item.bidNo,
title: item.title,
bidName: item.bidName,
date: item.date,
winningBid: item.winningBid,
url: item.href,
})),
};
// ========== 2. 采集招标公告 ==========
console.log('\n========== 采集招标公告 ==========');
const bidItems = await fetchBidAnnounceByDateRange(startDate, endDate, 20);
// 筛选招标项目根据阈值筛选阈值为0时不筛选只要求有金额
const bidFiltered = bidItems.filter(item => {
if (!item.estimatedAmount) return false;
if (bidThreshold === 0) return true; // 阈值为0时不筛选
return item.estimatedAmount / 10000 > bidThreshold; // 估算价是元,阈值是万元,需要转换
});
const bidTotal = bidFiltered.reduce(
(sum, item) => sum + (item.estimatedAmount || 0),
0
);
console.log(`招标公告: 采集 ${bidItems.length} 条,有金额 ${bidFiltered.length}`);
// 生成招标报告
const bidReport = {
summary: {
total_count: bidItems.length,
filtered_count: bidFiltered.length,
has_amount_count: bidFiltered.length,
threshold: bidThreshold === 0 ? '无' : `${bidThreshold}万元`,
total_amount: `${(bidTotal / 10000).toFixed(2)}万元`,
total_amount_yuan: bidTotal,
generated_at: new Date().toISOString(),
date_range: { startDate, endDate },
report_type: '招标公告'
},
projects: bidFiltered.map((item) => ({
title: item.title,
bidCode: item.bidCode,
tenderee: item.tenderee,
date: item.date,
duration: item.duration,
estimatedAmount: item.estimatedAmount ? {
amount: item.estimatedAmount,
amountWan: (item.estimatedAmount / 10000).toFixed(2),
unit: '元'
} : null,
url: item.href,
})),
};
// ========== 3. 检查是否有数据需要发送 ==========
if (winningFiltered.length === 0 && bidFiltered.length === 0) {
console.log('\n========================================');
console.log('暂无符合条件的项目,不发送邮件');
console.log('========================================');
if (scrapers.length === 0) {
console.log('没有已启用的抓取来源,跳过');
return;
}
// ========== 4. 发送综合邮件 ==========
console.log('\n========================================');
console.log('正在发送综合报告邮件...');
const emailConfig = config.email;
console.log(`${scrapers.length} 个已启用的抓取来源`);
const result = await sendCombinedReportEmail(emailConfig, winningReport, bidReport);
// 逐个运行抓取任务
const results = [];
for (const scraper of scrapers) {
try {
console.log(`\n---------- 抓取: ${scraper.city} - ${scraper.section} ${scraper.type} ----------`);
const r = await runScraper(scraper);
results.push(r);
console.log(`✓ 抓取成功`);
} catch (err) {
console.error(`✗ 抓取失败: ${err.message}`);
const errRecord = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
scrapedAt: new Date().toISOString(),
error: err.message,
data: null,
};
appendResult(errRecord);
results.push(errRecord);
}
}
const successCount = results.filter(r => !r.error).length;
const failCount = results.filter(r => r.error).length;
console.log(`\n========== 抓取完成 ==========`);
console.log(`成功: ${successCount} 条,失败: ${failCount}`);
// 检查是否需要发送邮件
if (successCount === 0) {
console.log('没有成功的抓取结果,不发送邮件');
return;
}
// 发送邮件报告
if (config.email?.smtpHost && config.email?.smtpUser) {
console.log('\n正在发送抓取结果邮件...');
try {
const emailResult = await sendScraperResultsEmail(config.email, results);
console.log('邮件发送成功! MessageId:', emailResult.messageId);
} catch (emailErr) {
console.error('邮件发送失败:', emailErr.message);
}
} else {
console.log('邮件配置不完整,跳过邮件发送');
}
console.log('邮件发送成功!');
console.log('收件人:', emailConfig.recipients);
console.log('MessageId:', result.messageId);
console.log(`内容: 中标公示 ${winningFiltered.length} 条,招标公告 ${bidFiltered.length}`);
console.log('========================================');
console.log('定时任务执行完成');
console.log('========================================');
} catch (error) {
@@ -511,96 +202,60 @@ async function executeScheduledTask(config) {
// 存储当前的定时任务
let currentScheduledTask = null;
// 初始化定时任务
export function initScheduler() {
const config = loadConfig();
if (!config) {
console.error('无法启动定时任务: 配置文件加载失败');
return;
}
if (!config.scheduler || !config.scheduler.enabled) {
console.log('定时任务已禁用');
return;
}
if (!config.email || !config.email.smtpHost || !config.email.smtpUser) {
console.error('无法启动定时任务: 邮件配置不完整');
console.error('请在 config.json 中配置邮件信息');
return;
}
if (!config) { console.error('无法启动定时任务: 配置文件加载失败'); return; }
if (!config.scheduler?.enabled) { console.log('定时任务已禁用'); return; }
const cronTime = config.scheduler.cronTime || '0 9 * * *';
const enabledCount = (config.scrapers || []).filter(s => s.enabled).length;
console.log('========================================');
console.log('定时任务已启动');
console.log('执行计划:', cronTime);
console.log('中标阈值:', config.scheduler.winningThreshold, '万元');
console.log('招标阈值:', config.scheduler.bidThreshold, '万元', config.scheduler.bidThreshold === 0 ? '(不筛选)' : '');
console.log('收件人:', config.email.recipients);
console.log('定时任务已启动,执行计划:', cronTime);
console.log(`已启用的抓取来源: ${enabledCount}`);
if (config.email?.recipients) console.log('收件人:', config.email.recipients);
console.log('========================================');
// 如果已有任务在运行,先停止
if (currentScheduledTask) {
currentScheduledTask.stop();
console.log('已停止旧的定时任务');
}
if (currentScheduledTask) { currentScheduledTask.stop(); }
// 创建定时任务
currentScheduledTask = cron.schedule(cronTime, () => {
executeScheduledTask(config);
}, {
timezone: 'Asia/Shanghai'
});
// 每次执行时重新加载配置,确保使用最新的 scrapers
const latestConfig = loadConfig();
if (latestConfig) {
executeScheduledTask(latestConfig);
}
}, { timezone: 'Asia/Shanghai' });
}
// 重新加载配置并重启定时任务
export function reloadScheduler() {
console.log('重新加载定时任务配置...');
// 停止当前任务
if (currentScheduledTask) {
currentScheduledTask.stop();
currentScheduledTask = null;
console.log('已停止当前定时任务');
}
// 重新初始化
if (currentScheduledTask) { currentScheduledTask.stop(); currentScheduledTask = null; }
initScheduler();
}
// 停止定时任务
export function stopScheduler() {
if (currentScheduledTask) {
currentScheduledTask.stop();
currentScheduledTask = null;
console.log('定时任务已停止');
return true;
currentScheduledTask.stop(); currentScheduledTask = null;
console.log('定时任务已停止'); return true;
}
return false;
}
// 获取定时任务状态
export function getSchedulerStatus() {
const config = loadConfig();
const enabledScrapers = (config?.scrapers || []).filter(s => s.enabled).length;
return {
isRunning: currentScheduledTask !== null,
enabledScrapers,
config: config ? {
enabled: config.scheduler?.enabled || false,
cronTime: config.scheduler?.cronTime || '0 9 * * *',
winningThreshold: config.scheduler?.winningThreshold !== undefined ? config.scheduler.winningThreshold : 10000,
bidThreshold: config.scheduler?.bidThreshold !== undefined ? config.scheduler.bidThreshold : 0,
timeRange: config.scheduler?.timeRange || 'thisMonth',
description: config.scheduler?.description || '',
} : null,
};
}
// 手动执行任务(用于测试)
export async function runTaskNow() {
const config = loadConfig();
if (!config) {
throw new Error('配置文件加载失败');
}
if (!config) throw new Error('配置文件加载失败');
await executeScheduledTask(config);
}

File diff suppressed because it is too large Load Diff