Files
tool-node/src/scheduler.js
zhaojunlong b4afc1ce5a ```
feat(scheduler): 添加定时任务功能并集成前端配置界面

- 引入 node-cron 依赖以支持定时任务调度
- 新增定时任务相关 API 接口:获取配置、更新配置、查询状态、手动触发任务
- 前端新增“定时任务”标签页,支持 Cron 表达式配置与友好时间展示
- 支持通过 Web 界面启用/禁用定时任务、设置执行计划和金额阈值
- 定时任务可自动采集数据并发送邮件报告,无需重启服务即可生效新配置
- 优化配置保存逻辑,避免敏感信息泄露
```
2025-12-15 15:22:42 +08:00

632 lines
17 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cron from 'node-cron';
import { readFileSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import axios from 'axios';
import * as cheerio from 'cheerio';
import iconv from 'iconv-lite';
import { sendReportEmail } from './emailService.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// 加载配置文件
function loadConfig() {
try {
const configPath = join(__dirname, '..', 'config.json');
const configContent = readFileSync(configPath, 'utf-8');
return JSON.parse(configContent);
} catch (error) {
console.error('加载配置文件失败:', error.message);
console.error('请确保 config.json 文件存在并配置正确');
return null;
}
}
// 根据时间范围类型获取开始和结束日期
function getDateRangeByType(timeRange) {
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, '0');
const day = String(now.getDate()).padStart(2, '0');
let startDate, endDate;
endDate = `${year}-${month}-${day}`; // 结束日期都是今天
switch (timeRange) {
case 'today':
// 今日
startDate = `${year}-${month}-${day}`;
break;
case 'thisWeek': {
// 本周 (从周一开始)
const dayOfWeek = now.getDay(); // 0是周日,1是周一
const diff = dayOfWeek === 0 ? 6 : dayOfWeek - 1; // 计算到周一的天数差
const monday = new Date(now);
monday.setDate(now.getDate() - diff);
const weekYear = monday.getFullYear();
const weekMonth = String(monday.getMonth() + 1).padStart(2, '0');
const weekDay = String(monday.getDate()).padStart(2, '0');
startDate = `${weekYear}-${weekMonth}-${weekDay}`;
break;
}
case 'thisMonth':
default:
// 本月
startDate = `${year}-${month}-01`;
break;
}
return { startDate, endDate };
}
// 获取本月的开始和结束日期 (兼容旧代码)
function getCurrentMonthDateRange() {
return getDateRangeByType('thisMonth');
}
// 从server.js复制的辅助函数
const BASE_URL = 'https://gjzx.nanjing.gov.cn/gggs/';
const http = axios.create({
responseType: 'arraybuffer',
timeout: 10000,
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; gjzx-scraper/1.0)',
},
});
function pickEncoding(contentType = '') {
const match = /charset=([^;]+)/i.exec(contentType);
if (!match) return 'utf-8';
const charset = match[1].trim().toLowerCase();
if (charset.includes('gb')) return 'gbk';
return charset;
}
async function fetchHtml(url) {
const res = await http.get(url);
const encoding = pickEncoding(res.headers['content-type']);
const html = iconv.decode(res.data, encoding || 'utf-8');
return html;
}
function getPageUrl(pageIndex, baseUrl = BASE_URL) {
if (pageIndex === 0) {
return baseUrl;
}
const cleanBaseUrl = baseUrl.replace(/\/$/, '');
return `${cleanBaseUrl}/index_${pageIndex}.html`;
}
function parseList(html) {
const $ = cheerio.load(html);
const items = [];
$('table tr').each((_, row) => {
const $row = $(row);
const link = $row.find('td:first-child a').first();
const dateCell = $row.find('td:nth-child(2)');
if (link.length && dateCell.length) {
const title = link.attr('title') || link.text().trim();
const rawHref = link.attr('href') || '';
const dateText = dateCell.text().trim();
if (!rawHref || !title || title.length < 5) return;
if (rawHref === './' || rawHref === '../') return;
if (!/^\d{4}-\d{2}-\d{2}$/.test(dateText)) return;
try {
const href = new URL(rawHref, BASE_URL).toString();
items.push({ title, href, date: dateText });
} catch (err) {
return;
}
}
});
return items;
}
function isDateInRange(dateStr, startDate, endDate) {
if (!dateStr) return false;
const date = new Date(dateStr);
if (isNaN(date.getTime())) return false;
if (startDate && date < new Date(startDate)) return false;
if (endDate && date > new Date(endDate)) return false;
return true;
}
async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
const allItems = [];
let shouldContinue = true;
let pageIndex = 0;
console.log(`开始按时间范围采集: ${startDate || '不限'}${endDate || '不限'}`);
while (shouldContinue && pageIndex < maxPages) {
const pageUrl = getPageUrl(pageIndex);
console.log(`正在采集第 ${pageIndex + 1} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const items = parseList(html);
if (items.length === 0) {
console.log(`${pageIndex + 1} 页没有数据,停止采集`);
break;
}
let hasItemsInRange = false;
let allItemsBeforeRange = true;
for (const item of items) {
if (isDateInRange(item.date, startDate, endDate)) {
allItems.push(item);
hasItemsInRange = true;
allItemsBeforeRange = false;
} else if (startDate && new Date(item.date) < new Date(startDate)) {
allItemsBeforeRange = allItemsBeforeRange && true;
} else {
allItemsBeforeRange = false;
}
}
if (allItemsBeforeRange && startDate) {
console.log(`${pageIndex + 1} 页所有项目都早于起始日期,停止采集`);
shouldContinue = false;
}
console.log(`${pageIndex + 1} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`);
pageIndex++;
if (shouldContinue && pageIndex < maxPages) {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`采集第 ${pageIndex + 1} 页失败: ${err.message}`);
break;
}
}
console.log(`总共采集了 ${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`);
return allItems;
}
// 从server.js导入parseDetail相关函数
function parseDetail(html) {
const $ = cheerio.load(html);
let title = $('.title18').text().trim();
if (!title) {
title = $('.article-info h1').text().trim();
}
if (!title) {
title = $('h1').first().text().trim();
}
const publishTd = $('td:contains("发布部门")').filter((_, el) => {
return $(el).text().includes('发布时间');
});
const publishText = publishTd.text().trim();
let timeMatch = publishText.match(/(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/);
let publishTime = timeMatch ? timeMatch[1] : '';
if (!publishTime) {
const infoText = $('.info-sources').text() || $('body').text();
timeMatch = infoText.match(/(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2})/);
publishTime = timeMatch ? timeMatch[1] : '';
}
let content = '';
const contentSelectors = [
'.zhenwen td',
'.con',
'.article-content',
'.ewb-article-content',
'body'
];
for (const selector of contentSelectors) {
const el = $(selector).first();
if (el.length > 0) {
const text = el.text().trim();
if (text.length > content.length) {
content = text;
}
}
}
const budget = extractBudget(content);
return {
title,
publishTime,
content,
budget,
};
}
function extractBudget(content) {
let cleanedContent = content.replace(/(\d)\s*[\n\r]\s*(?=\d)/g, '$1');
const patterns = [
{ regex: /(?:¥|¥|人民币)\s*([\d,]+(?:\.\d+)?)\s*万元/i, priority: 1 },
{ regex: /[(][¥¥]([\d,]+(?:\.\d+)?)[)]/i, priority: 2, divider: 10000 },
{ regex: /([\d,]+(?:\.\d+)?)\s*万元/i, priority: 3 },
{ regex: /(?:¥|¥|人民币)\s*([\d,]+(?:\.\d+)?)\s*元/i, priority: 4, divider: 10000 },
{ regex: /([\d,]+(?:\.\d+)?)\s*元(?!整)/i, priority: 5, divider: 10000 }
];
let bestMatch = null;
let bestPriority = Infinity;
for (const pattern of patterns) {
const match = cleanedContent.match(pattern.regex);
if (match && pattern.priority < bestPriority) {
const numberStr = match[1].replace(/[,]/g, '');
let amount = parseFloat(numberStr);
if (pattern.divider) {
amount = amount / pattern.divider;
}
if (!isNaN(amount) && amount >= 0.01 && amount <= 100000000) {
bestMatch = {
amount,
unit: '万元',
text: match[0],
originalUnit: pattern.divider ? '元' : '万元'
};
bestPriority = pattern.priority;
}
}
}
return bestMatch;
}
// 从API获取PDF URL
async function fetchPdfUrlFromApi(pageUrl) {
try {
const bulletinIdMatch = pageUrl.match(/bulletinDetails\/[^\/]+\/([a-f0-9]+)/i);
const bulletinTypeMatch = pageUrl.match(/bulletinType=(\d+)/);
if (!bulletinIdMatch) {
return null;
}
const bulletinId = bulletinIdMatch[1];
const bulletinType = bulletinTypeMatch ? bulletinTypeMatch[1] : '1';
const apiUrl = `https://api.jszbtb.com/DataGatewayApi/PublishBulletin/BulletinType/${bulletinType}/ID/${bulletinId}`;
const response = await http.get(apiUrl, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/json',
'Referer': 'https://www.jszbcg.com/'
},
responseType: 'arraybuffer'
});
const responseText = iconv.decode(response.data, 'utf-8');
const data = JSON.parse(responseText);
if (data.success && data.data && data.data.signedPdfUrl) {
return data.data.signedPdfUrl;
}
return null;
} catch (err) {
return null;
}
}
function extractPdfUrl(html, pageUrl) {
const $ = cheerio.load(html);
let iframe = $('iframe').first();
if (!iframe.length) {
iframe = $('iframe[src*="pdf"]').first();
}
if (!iframe.length) {
iframe = $('iframe[src*="viewer"]').first();
}
if (iframe.length) {
const src = iframe.attr('src');
if (!src) return null;
const match = src.match(/[?&]file=([^&]+)/);
if (match) {
let pdfUrl = decodeURIComponent(match[1]);
if (!pdfUrl.startsWith('http://') && !pdfUrl.startsWith('https://')) {
try {
pdfUrl = new URL(pdfUrl, pageUrl).toString();
} catch (err) {
return null;
}
}
return pdfUrl;
}
}
return null;
}
async function fetchPdfContent(pdfUrl) {
try {
const { PDFParse } = await import('pdf-parse');
const response = await http.get(pdfUrl, {
responseType: 'arraybuffer',
timeout: 30000,
});
const parser = new PDFParse({ data: response.data });
const result = await parser.getText();
await parser.destroy();
return result.text;
} catch (err) {
throw err;
}
}
async function parseDetailEnhanced(html, pageUrl) {
const $ = cheerio.load(html);
let pdfUrl = null;
if (pageUrl.includes('jszbcg.com')) {
pdfUrl = await fetchPdfUrlFromApi(pageUrl);
}
if (!pdfUrl) {
pdfUrl = extractPdfUrl(html, pageUrl);
}
let content = '';
let pdfParsed = false;
if (pdfUrl) {
try {
content = await fetchPdfContent(pdfUrl);
pdfParsed = true;
} catch (err) {
const htmlDetail = parseDetail(html);
content = htmlDetail.content;
}
} else {
const htmlDetail = parseDetail(html);
content = htmlDetail.content;
}
const budget = extractBudget(content);
const basicInfo = parseDetail(html);
return {
...basicInfo,
content,
budget,
hasPdf: pdfParsed,
pdfUrl: pdfParsed ? pdfUrl : null,
};
}
// 定时任务执行函数
async function executeScheduledTask(config) {
try {
console.log('========================================');
console.log('定时任务开始执行');
console.log('执行时间:', new Date().toLocaleString('zh-CN'));
console.log('========================================');
const timeRange = config.scheduler.timeRange || 'thisMonth';
const { startDate, endDate } = getDateRangeByType(timeRange);
const threshold = config.scheduler.threshold || 100000; // 默认10亿(100000万元)
const timeRangeNames = {
'today': '今日',
'thisWeek': '本周',
'thisMonth': '本月'
};
console.log(`采集时间段: ${timeRangeNames[timeRange] || '本月'}`);
console.log(`采集时间范围: ${startDate}${endDate}`);
console.log(`金额阈值: ${threshold}万元 (${threshold / 10000}亿元)`);
// 采集列表
const items = await fetchListByDateRange(startDate, endDate, 23);
if (items.length === 0) {
console.log('本月暂无公告数据');
return;
}
// 采集详情
console.log('========================================');
console.log(`开始采集 ${items.length} 条公告的详情...`);
const results = [];
for (let i = 0; i < items.length; i++) {
const item = items[i];
try {
console.log(`[${i + 1}/${items.length}] 正在采集: ${item.title}`);
const html = await fetchHtml(item.href);
const detail = await parseDetailEnhanced(html, item.href);
results.push({
...item,
detail,
});
await new Promise((resolve) => setTimeout(resolve, 500));
} catch (err) {
console.error(`采集失败: ${err.message}`);
results.push({
...item,
detail: null,
error: err.message,
});
}
}
// 筛选大于阈值的项目
const filtered = results.filter((item) => {
return item.detail?.budget && item.detail.budget.amount > threshold;
});
console.log('========================================');
console.log(`筛选结果: 找到 ${filtered.length} 个大于 ${threshold}万元 的项目`);
if (filtered.length === 0) {
console.log('本月暂无符合条件的大额项目');
return;
}
// 计算总金额
const total = filtered.reduce(
(sum, item) => sum + (item.detail.budget?.amount || 0),
0
);
// 生成报告
const report = {
summary: {
total_count: results.length,
filtered_count: filtered.length,
threshold: `${threshold}万元`,
total_amount: `${total.toFixed(2)}万元`,
generated_at: new Date().toISOString(),
date_range: { startDate, endDate },
},
projects: filtered.map((item) => ({
title: item.title,
date: item.date,
publish_time: item.detail.publishTime,
budget: item.detail.budget,
url: item.href,
})),
};
// 发送邮件
console.log('========================================');
console.log('正在发送邮件报告...');
const emailConfig = config.email;
const result = await sendReportEmail(emailConfig, report);
console.log('邮件发送成功!');
console.log('收件人:', emailConfig.recipients);
console.log('MessageId:', result.messageId);
console.log('========================================');
console.log('定时任务执行完成');
console.log('========================================');
} catch (error) {
console.error('========================================');
console.error('定时任务执行失败:', error.message);
console.error(error.stack);
console.error('========================================');
}
}
// 存储当前的定时任务
let currentScheduledTask = null;
// 初始化定时任务
export function initScheduler() {
const config = loadConfig();
if (!config) {
console.error('无法启动定时任务: 配置文件加载失败');
return;
}
if (!config.scheduler || !config.scheduler.enabled) {
console.log('定时任务已禁用');
return;
}
if (!config.email || !config.email.smtpHost || !config.email.smtpUser) {
console.error('无法启动定时任务: 邮件配置不完整');
console.error('请在 config.json 中配置邮件信息');
return;
}
const cronTime = config.scheduler.cronTime || '0 9 * * *';
console.log('========================================');
console.log('定时任务已启动');
console.log('执行计划:', cronTime);
console.log('金额阈值:', config.scheduler.threshold, '万元');
console.log('收件人:', config.email.recipients);
console.log('========================================');
// 如果已有任务在运行,先停止
if (currentScheduledTask) {
currentScheduledTask.stop();
console.log('已停止旧的定时任务');
}
// 创建定时任务
currentScheduledTask = cron.schedule(cronTime, () => {
executeScheduledTask(config);
}, {
timezone: 'Asia/Shanghai'
});
}
// 重新加载配置并重启定时任务
export function reloadScheduler() {
console.log('重新加载定时任务配置...');
// 停止当前任务
if (currentScheduledTask) {
currentScheduledTask.stop();
currentScheduledTask = null;
console.log('已停止当前定时任务');
}
// 重新初始化
initScheduler();
}
// 停止定时任务
export function stopScheduler() {
if (currentScheduledTask) {
currentScheduledTask.stop();
currentScheduledTask = null;
console.log('定时任务已停止');
return true;
}
return false;
}
// 获取定时任务状态
export function getSchedulerStatus() {
const config = loadConfig();
return {
isRunning: currentScheduledTask !== null,
config: config ? {
enabled: config.scheduler?.enabled || false,
cronTime: config.scheduler?.cronTime || '0 9 * * *',
threshold: config.scheduler?.threshold || 100000,
} : null,
};
}
// 手动执行任务(用于测试)
export async function runTaskNow() {
const config = loadConfig();
if (!config) {
throw new Error('配置文件加载失败');
}
await executeScheduledTask(config);
}