From b4afc1ce5a15195c383ec28cc12bd9172ae26b6c Mon Sep 17 00:00:00 2001
From: zhaojunlong <5482498@qq.com>
Date: Mon, 15 Dec 2025 15:22:42 +0800
Subject: [PATCH] =?UTF-8?q?```=20feat(scheduler):=20=E6=B7=BB=E5=8A=A0?=
=?UTF-8?q?=E5=AE=9A=E6=97=B6=E4=BB=BB=E5=8A=A1=E5=8A=9F=E8=83=BD=E5=B9=B6?=
=?UTF-8?q?=E9=9B=86=E6=88=90=E5=89=8D=E7=AB=AF=E9=85=8D=E7=BD=AE=E7=95=8C?=
=?UTF-8?q?=E9=9D=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- 引入 node-cron 依赖以支持定时任务调度
- 新增定时任务相关 API 接口:获取配置、更新配置、查询状态、手动触发任务
- 前端新增“定时任务”标签页,支持 Cron 表达式配置与友好时间展示
- 支持通过 Web 界面启用/禁用定时任务、设置执行计划和金额阈值
- 定时任务可自动采集数据并发送邮件报告,无需重启服务即可生效新配置
- 优化配置保存逻辑,避免敏感信息泄露
```
---
config.example.json | 21 ++
config.json | 16 +
node_modules/.package-lock.json | 9 +
package-lock.json | 10 +
package.json | 1 +
public/app.js | 298 ++++++++++++++-
public/index.html | 113 ++++++
src/scheduler.js | 631 ++++++++++++++++++++++++++++++++
src/server.js | 94 +++++
9 files changed, 1188 insertions(+), 5 deletions(-)
create mode 100644 config.example.json
create mode 100644 config.json
create mode 100644 src/scheduler.js
diff --git a/config.example.json b/config.example.json
new file mode 100644
index 0000000..bc04c4d
--- /dev/null
+++ b/config.example.json
@@ -0,0 +1,21 @@
+{
+ "scheduler": {
+ "enabled": true,
+ "cronTime": "0 9 * * *",
+ "threshold": 100000,
+ "description": "每天9点采集大于10亿的项目",
+ "timeRange": "thisMonth",
+ "pushRules": {
+ "urgentThreshold": 500000,
+ "urgentPush": false,
+ "summaryPush": true
+ }
+ },
+ "email": {
+ "smtpHost": "smtp.example.com",
+ "smtpPort": 587,
+ "smtpUser": "your-email@example.com",
+ "smtpPass": "your-password",
+ "recipients": "recipient1@example.com,recipient2@example.com"
+ }
+}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..731d3c6
--- /dev/null
+++ b/config.json
@@ -0,0 +1,16 @@
+{
+ "scheduler": {
+ "enabled": false,
+ "cronTime": "0 9 * * *",
+ "threshold": 100000,
+ "description": "每天9点采集大于10亿的项目",
+ "timeRange": "today"
+ },
+ "email": {
+ "smtpHost": "smtp.qq.com",
+ "smtpPort": 587,
+ "smtpUser": "1076597680@qq.com",
+ "smtpPass": "nfrjdiraqddsjeeh",
+ "recipients": "5482498@qq.com"
+ }
+}
\ No newline at end of file
diff --git a/node_modules/.package-lock.json b/node_modules/.package-lock.json
index 96859aa..2606e5e 100644
--- a/node_modules/.package-lock.json
+++ b/node_modules/.package-lock.json
@@ -1011,6 +1011,15 @@
"node": ">= 0.6"
}
},
+ "node_modules/node-cron": {
+ "version": "4.2.1",
+ "resolved": "https://registry.npmmirror.com/node-cron/-/node-cron-4.2.1.tgz",
+ "integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg==",
+ "license": "ISC",
+ "engines": {
+ "node": ">=6.0.0"
+ }
+ },
"node_modules/nodemailer": {
"version": "7.0.11",
"resolved": "https://registry.npmmirror.com/nodemailer/-/nodemailer-7.0.11.tgz",
diff --git a/package-lock.json b/package-lock.json
index a5988dd..7d35d59 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -14,6 +14,7 @@
"docx": "^9.5.1",
"express": "^5.2.1",
"iconv-lite": "^0.6.3",
+ "node-cron": "^4.2.1",
"nodemailer": "^7.0.11",
"pdf-parse": "^2.4.5"
}
@@ -1169,6 +1170,15 @@
"node": ">= 0.6"
}
},
+ "node_modules/node-cron": {
+ "version": "4.2.1",
+ "resolved": "https://registry.npmmirror.com/node-cron/-/node-cron-4.2.1.tgz",
+ "integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg==",
+ "license": "ISC",
+ "engines": {
+ "node": ">=6.0.0"
+ }
+ },
"node_modules/nodemailer": {
"version": "7.0.11",
"resolved": "https://registry.npmmirror.com/nodemailer/-/nodemailer-7.0.11.tgz",
diff --git a/package.json b/package.json
index 9f895ed..8fd7ce1 100644
--- a/package.json
+++ b/package.json
@@ -14,6 +14,7 @@
"docx": "^9.5.1",
"express": "^5.2.1",
"iconv-lite": "^0.6.3",
+ "node-cron": "^4.2.1",
"nodemailer": "^7.0.11",
"pdf-parse": "^2.4.5"
}
diff --git a/public/app.js b/public/app.js
index 1d3733a..1b6041e 100644
--- a/public/app.js
+++ b/public/app.js
@@ -479,11 +479,6 @@ async function exportReport() {
// ========== 邮件功能 ==========
-// 页面加载时加载邮件配置
-document.addEventListener('DOMContentLoaded', function() {
- loadEmailConfig();
-});
-
// 保存邮件配置到localStorage
function saveEmailConfig() {
const config = {
@@ -670,3 +665,296 @@ function showEmailStatus(message, type) {
}, 3000);
}
}
+
+// ========== 定时任务功能 ==========
+
+// 将Cron表达式转换为友好的时间描述
+function cronToFriendlyText(cronTime) {
+ // 常见的预设值映射
+ const cronMap = {
+ '0 9 * * *': '每天上午9点',
+ '0 6 * * *': '每天上午6点',
+ '0 12 * * *': '每天中午12点',
+ '0 18 * * *': '每天下午18点',
+ '0 9,18 * * *': '每天9点和18点',
+ '0 */6 * * *': '每6小时',
+ '0 */12 * * *': '每12小时',
+ '0 9 * * 1': '每周一上午9点',
+ '0 9 1 * *': '每月1日上午9点'
+ };
+
+ // 如果是预设值,直接返回
+ if (cronMap[cronTime]) {
+ return cronMap[cronTime];
+ }
+
+ // 尝试解析自定义时间 "分 时 * * *" 格式
+ const cronParts = cronTime.split(/\s+/);
+ if (cronParts.length === 5 && cronParts[2] === '*' && cronParts[3] === '*' && cronParts[4] === '*') {
+ const minute = cronParts[0];
+ const hour = cronParts[1];
+
+ // 检查是否是整点
+ if (minute === '0') {
+ return `每天${hour}点`;
+ } else {
+ return `每天${hour}点${minute}分`;
+ }
+ }
+
+ // 如果无法解析,返回原始值
+ return cronTime;
+}
+
+// 加载定时任务配置
+async function loadSchedulerConfig() {
+ try {
+ // 从服务器获取配置
+ const response = await fetch(`${API_BASE}/config`);
+ const data = await response.json();
+
+ if (data.success && data.data) {
+ const config = data.data;
+
+ // 填充表单
+ if (config.scheduler) {
+ document.getElementById('schedulerEnabled').checked = config.scheduler.enabled || false;
+ const cronTime = config.scheduler.cronTime || '0 9 * * *';
+ document.getElementById('schedulerCronInput').value = cronTime;
+ document.getElementById('schedulerThresholdInput').value = config.scheduler.threshold || 100000;
+ document.getElementById('schedulerDescription').value = config.scheduler.description || '';
+
+ // 时间段配置
+ document.getElementById('schedulerTimeRange').value = config.scheduler.timeRange || 'thisMonth';
+
+ // 反向映射Cron表达式到预设选择器
+ const presetSelector = document.getElementById('schedulerCronPreset');
+ const customGroup = document.getElementById('customCronGroup');
+
+ // 预设值列表
+ const presets = [
+ '0 9 * * *',
+ '0 6 * * *',
+ '0 12 * * *',
+ '0 18 * * *',
+ '0 9,18 * * *',
+ '0 */6 * * *',
+ '0 */12 * * *',
+ '0 9 * * 1',
+ '0 9 1 * *'
+ ];
+
+ // 检查是否匹配预设值
+ if (presets.includes(cronTime)) {
+ presetSelector.value = cronTime;
+ customGroup.style.display = 'none';
+ } else {
+ // 自定义时间 - 尝试解析为 "分 时 * * *" 格式
+ presetSelector.value = 'custom';
+ customGroup.style.display = 'block';
+
+ const cronParts = cronTime.split(/\s+/);
+ if (cronParts.length >= 2) {
+ document.getElementById('customMinute').value = cronParts[0];
+ document.getElementById('customHour').value = cronParts[1];
+ }
+ }
+ }
+
+ // 更新状态显示
+ await updateSchedulerStatus();
+ }
+ } catch (error) {
+ console.error('加载定时任务配置失败:', error);
+ showSchedulerStatus('加载配置失败: ' + error.message, 'error');
+ }
+}
+
+// 处理Cron预设选择器变化
+function handleCronPresetChange() {
+ const preset = document.getElementById('schedulerCronPreset').value;
+ const customGroup = document.getElementById('customCronGroup');
+ const cronInput = document.getElementById('schedulerCronInput');
+
+ if (preset === 'custom') {
+ // 显示自定义时间选择器
+ customGroup.style.display = 'block';
+ updateCustomCron(); // 根据自定义时间生成Cron表达式
+ } else {
+ // 隐藏自定义时间选择器,使用预设Cron表达式
+ customGroup.style.display = 'none';
+ cronInput.value = preset;
+ }
+}
+
+// 根据自定义小时和分钟生成Cron表达式
+function updateCustomCron() {
+ const hour = document.getElementById('customHour').value;
+ const minute = document.getElementById('customMinute').value;
+ const cronInput = document.getElementById('schedulerCronInput');
+
+ // 生成格式: 分 时 * * * (每天指定时间执行)
+ cronInput.value = `${minute} ${hour} * * *`;
+}
+
+document.addEventListener('DOMContentLoaded', function() {
+ loadEmailConfig();
+ loadSchedulerConfig();
+
+ // 添加自定义时间输入框的事件监听
+ const customHour = document.getElementById('customHour');
+ const customMinute = document.getElementById('customMinute');
+ if (customHour) {
+ customHour.addEventListener('change', updateCustomCron);
+ }
+ if (customMinute) {
+ customMinute.addEventListener('change', updateCustomCron);
+ }
+});
+
+// 更新定时任务状态显示
+async function updateSchedulerStatus() {
+ try {
+ const response = await fetch(`${API_BASE}/scheduler/status`);
+ const data = await response.json();
+
+ if (data.success && data.data) {
+ const status = data.data;
+
+ // 更新运行状态
+ const statusText = status.isRunning ? '✓ 运行中' : '✗ 未运行';
+ const statusColor = status.isRunning ? '#28a745' : '#dc3545';
+ document.getElementById('schedulerRunningStatus').innerHTML = `${statusText}`;
+
+ // 更新执行计划
+ if (status.config) {
+ document.getElementById('schedulerCronTime').textContent = cronToFriendlyText(status.config.cronTime);
+ const thresholdBillion = (status.config.threshold / 10000).toFixed(1);
+ document.getElementById('schedulerThreshold').textContent = `${status.config.threshold}万元 (${thresholdBillion}亿)`;
+ }
+ }
+ } catch (error) {
+ console.error('获取定时任务状态失败:', error);
+ }
+}
+
+// 保存定时任务配置
+async function saveSchedulerConfig() {
+ const schedulerConfig = {
+ enabled: document.getElementById('schedulerEnabled').checked,
+ cronTime: document.getElementById('schedulerCronInput').value,
+ threshold: parseInt(document.getElementById('schedulerThresholdInput').value),
+ description: document.getElementById('schedulerDescription').value,
+ timeRange: document.getElementById('schedulerTimeRange').value
+ };
+
+ // 验证Cron表达式格式(简单验证)
+ const cronParts = schedulerConfig.cronTime.trim().split(/\s+/);
+ if (cronParts.length !== 5) {
+ showSchedulerStatus('Cron表达式格式错误,应为5个部分(分 时 日 月 周)', 'error');
+ return;
+ }
+
+ // 从localStorage获取邮件配置
+ const emailConfigStr = localStorage.getItem('emailConfig');
+ let emailConfig = {};
+
+ if (emailConfigStr) {
+ try {
+ emailConfig = JSON.parse(emailConfigStr);
+ } catch (e) {
+ console.error('解析邮件配置失败:', e);
+ }
+ }
+
+ // 如果邮件配置为空,提示用户
+ if (!emailConfig.smtpHost || !emailConfig.smtpUser) {
+ if (confirm('检测到邮件配置未完成,定时任务需要邮件配置才能发送报告。\n\n是否继续保存定时任务配置(不保存邮件配置)?')) {
+ // 继续保存,但不包含邮件配置
+ } else {
+ return;
+ }
+ }
+
+ // 构建完整配置对象
+ const fullConfig = {
+ scheduler: schedulerConfig,
+ email: emailConfig
+ };
+
+ showSchedulerStatus('正在保存配置...', 'info');
+
+ try {
+ const response = await fetch(`${API_BASE}/config`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(fullConfig)
+ });
+
+ const data = await response.json();
+
+ if (data.success) {
+ showSchedulerStatus('配置已保存,定时任务已重新加载!', 'success');
+ // 刷新状态显示
+ await updateSchedulerStatus();
+ } else {
+ showSchedulerStatus(`保存失败: ${data.error}`, 'error');
+ }
+ } catch (error) {
+ showSchedulerStatus(`请求失败: ${error.message}`, 'error');
+ }
+}
+
+// 立即测试运行定时任务
+async function testSchedulerNow() {
+ if (!confirm('确定要立即执行定时任务吗?\n\n这将采集本月大于阈值的项目并发送邮件,可能需要几分钟时间。')) {
+ return;
+ }
+
+ showSchedulerStatus('正在后台执行定时任务,请稍候...', 'info');
+
+ try {
+ const response = await fetch(`${API_BASE}/run-scheduled-task`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' }
+ });
+
+ const data = await response.json();
+
+ if (data.success) {
+ showSchedulerStatus('定时任务已在后台开始执行,完成后将发送邮件。请查看服务器控制台日志了解进度。', 'success');
+ } else {
+ showSchedulerStatus(`执行失败: ${data.error}`, 'error');
+ }
+ } catch (error) {
+ showSchedulerStatus(`请求失败: ${error.message}`, 'error');
+ }
+}
+
+// 显示定时任务配置状态
+function showSchedulerStatus(message, type) {
+ const statusDiv = document.getElementById('schedulerConfigStatus');
+ const bgColors = {
+ success: '#d4edda',
+ error: '#f8d7da',
+ info: '#d1ecf1'
+ };
+ const textColors = {
+ success: '#155724',
+ error: '#721c24',
+ info: '#0c5460'
+ };
+
+ statusDiv.innerHTML = `
+
+ ${message}
+
+ `;
+
+ // 3秒后自动隐藏成功消息
+ if (type === 'success') {
+ setTimeout(() => {
+ statusDiv.innerHTML = '';
+ }, 3000);
+ }
+}
diff --git a/public/index.html b/public/index.html
index 3ff1b9c..2be93f4 100644
--- a/public/index.html
+++ b/public/index.html
@@ -342,6 +342,7 @@
+
@@ -470,6 +471,118 @@
+
+
+
定时任务配置
+
配置定时任务自动采集本月大于指定金额的项目并发送邮件报告
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 今日:今天 | 本周:本周一至今 | 本月:本月1日至今
+
+
+
+
+
+
+
+ 10亿 = 100000万元 | 5亿 = 50000万元 | 1亿 = 10000万元
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
使用说明
+
+ - 定时任务功能: 自动采集选中时间段的所有公告
+ - 筛选条件: 只保留预算金额大于设定阈值的项目
+ - 邮件发送: 自动将筛选结果生成HTML报告并发送到配置的邮箱
+ - 执行时间: 通过下拉菜单或自定义时间设置定时执行时间
+ - 注意事项: 保存配置后会自动重启定时任务,无需重启服务器
+
+
+
+
邮件配置
diff --git a/src/scheduler.js b/src/scheduler.js
new file mode 100644
index 0000000..4005e7d
--- /dev/null
+++ b/src/scheduler.js
@@ -0,0 +1,631 @@
+import cron from 'node-cron';
+import { readFileSync } from 'fs';
+import { fileURLToPath } from 'url';
+import { dirname, join } from 'path';
+import axios from 'axios';
+import * as cheerio from 'cheerio';
+import iconv from 'iconv-lite';
+import { sendReportEmail } from './emailService.js';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+// 加载配置文件
+function loadConfig() {
+ try {
+ const configPath = join(__dirname, '..', 'config.json');
+ const configContent = readFileSync(configPath, 'utf-8');
+ return JSON.parse(configContent);
+ } catch (error) {
+ console.error('加载配置文件失败:', error.message);
+ console.error('请确保 config.json 文件存在并配置正确');
+ return null;
+ }
+}
+
+// 根据时间范围类型获取开始和结束日期
+function getDateRangeByType(timeRange) {
+ const now = new Date();
+ const year = now.getFullYear();
+ const month = String(now.getMonth() + 1).padStart(2, '0');
+ const day = String(now.getDate()).padStart(2, '0');
+
+ let startDate, endDate;
+ endDate = `${year}-${month}-${day}`; // 结束日期都是今天
+
+ switch (timeRange) {
+ case 'today':
+ // 今日
+ startDate = `${year}-${month}-${day}`;
+ break;
+
+ case 'thisWeek': {
+ // 本周 (从周一开始)
+ const dayOfWeek = now.getDay(); // 0是周日,1是周一
+ const diff = dayOfWeek === 0 ? 6 : dayOfWeek - 1; // 计算到周一的天数差
+ const monday = new Date(now);
+ monday.setDate(now.getDate() - diff);
+ const weekYear = monday.getFullYear();
+ const weekMonth = String(monday.getMonth() + 1).padStart(2, '0');
+ const weekDay = String(monday.getDate()).padStart(2, '0');
+ startDate = `${weekYear}-${weekMonth}-${weekDay}`;
+ break;
+ }
+
+ case 'thisMonth':
+ default:
+ // 本月
+ startDate = `${year}-${month}-01`;
+ break;
+ }
+
+ return { startDate, endDate };
+}
+
+// 获取本月的开始和结束日期 (兼容旧代码)
+function getCurrentMonthDateRange() {
+ return getDateRangeByType('thisMonth');
+}
+
+// 从server.js复制的辅助函数
+const BASE_URL = 'https://gjzx.nanjing.gov.cn/gggs/';
+
+const http = axios.create({
+ responseType: 'arraybuffer',
+ timeout: 10000,
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (compatible; gjzx-scraper/1.0)',
+ },
+});
+
+function pickEncoding(contentType = '') {
+ const match = /charset=([^;]+)/i.exec(contentType);
+ if (!match) return 'utf-8';
+ const charset = match[1].trim().toLowerCase();
+ if (charset.includes('gb')) return 'gbk';
+ return charset;
+}
+
+async function fetchHtml(url) {
+ const res = await http.get(url);
+ const encoding = pickEncoding(res.headers['content-type']);
+ const html = iconv.decode(res.data, encoding || 'utf-8');
+ return html;
+}
+
+function getPageUrl(pageIndex, baseUrl = BASE_URL) {
+ if (pageIndex === 0) {
+ return baseUrl;
+ }
+ const cleanBaseUrl = baseUrl.replace(/\/$/, '');
+ return `${cleanBaseUrl}/index_${pageIndex}.html`;
+}
+
+function parseList(html) {
+ const $ = cheerio.load(html);
+ const items = [];
+
+ $('table tr').each((_, row) => {
+ const $row = $(row);
+ const link = $row.find('td:first-child a').first();
+ const dateCell = $row.find('td:nth-child(2)');
+
+ if (link.length && dateCell.length) {
+ const title = link.attr('title') || link.text().trim();
+ const rawHref = link.attr('href') || '';
+ const dateText = dateCell.text().trim();
+
+ if (!rawHref || !title || title.length < 5) return;
+ if (rawHref === './' || rawHref === '../') return;
+ if (!/^\d{4}-\d{2}-\d{2}$/.test(dateText)) return;
+
+ try {
+ const href = new URL(rawHref, BASE_URL).toString();
+ items.push({ title, href, date: dateText });
+ } catch (err) {
+ return;
+ }
+ }
+ });
+
+ return items;
+}
+
+function isDateInRange(dateStr, startDate, endDate) {
+ if (!dateStr) return false;
+ const date = new Date(dateStr);
+ if (isNaN(date.getTime())) return false;
+
+ if (startDate && date < new Date(startDate)) return false;
+ if (endDate && date > new Date(endDate)) return false;
+ return true;
+}
+
+async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
+ const allItems = [];
+ let shouldContinue = true;
+ let pageIndex = 0;
+
+ console.log(`开始按时间范围采集: ${startDate || '不限'} 至 ${endDate || '不限'}`);
+
+ while (shouldContinue && pageIndex < maxPages) {
+ const pageUrl = getPageUrl(pageIndex);
+ console.log(`正在采集第 ${pageIndex + 1} 页: ${pageUrl}`);
+
+ try {
+ const html = await fetchHtml(pageUrl);
+ const items = parseList(html);
+
+ if (items.length === 0) {
+ console.log(`第 ${pageIndex + 1} 页没有数据,停止采集`);
+ break;
+ }
+
+ let hasItemsInRange = false;
+ let allItemsBeforeRange = true;
+
+ for (const item of items) {
+ if (isDateInRange(item.date, startDate, endDate)) {
+ allItems.push(item);
+ hasItemsInRange = true;
+ allItemsBeforeRange = false;
+ } else if (startDate && new Date(item.date) < new Date(startDate)) {
+ allItemsBeforeRange = allItemsBeforeRange && true;
+ } else {
+ allItemsBeforeRange = false;
+ }
+ }
+
+ if (allItemsBeforeRange && startDate) {
+ console.log(`第 ${pageIndex + 1} 页所有项目都早于起始日期,停止采集`);
+ shouldContinue = false;
+ }
+
+ console.log(`第 ${pageIndex + 1} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`);
+
+ pageIndex++;
+
+ if (shouldContinue && pageIndex < maxPages) {
+ await new Promise(resolve => setTimeout(resolve, 500));
+ }
+ } catch (err) {
+ console.error(`采集第 ${pageIndex + 1} 页失败: ${err.message}`);
+ break;
+ }
+ }
+
+ console.log(`总共采集了 ${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`);
+ return allItems;
+}
+
+// 从server.js导入parseDetail相关函数
+function parseDetail(html) {
+ const $ = cheerio.load(html);
+
+ let title = $('.title18').text().trim();
+ if (!title) {
+ title = $('.article-info h1').text().trim();
+ }
+ if (!title) {
+ title = $('h1').first().text().trim();
+ }
+
+ const publishTd = $('td:contains("发布部门")').filter((_, el) => {
+ return $(el).text().includes('发布时间');
+ });
+ const publishText = publishTd.text().trim();
+ let timeMatch = publishText.match(/(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/);
+ let publishTime = timeMatch ? timeMatch[1] : '';
+
+ if (!publishTime) {
+ const infoText = $('.info-sources').text() || $('body').text();
+ timeMatch = infoText.match(/(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2})/);
+ publishTime = timeMatch ? timeMatch[1] : '';
+ }
+
+ let content = '';
+ const contentSelectors = [
+ '.zhenwen td',
+ '.con',
+ '.article-content',
+ '.ewb-article-content',
+ 'body'
+ ];
+
+ for (const selector of contentSelectors) {
+ const el = $(selector).first();
+ if (el.length > 0) {
+ const text = el.text().trim();
+ if (text.length > content.length) {
+ content = text;
+ }
+ }
+ }
+
+ const budget = extractBudget(content);
+
+ return {
+ title,
+ publishTime,
+ content,
+ budget,
+ };
+}
+
+function extractBudget(content) {
+ let cleanedContent = content.replace(/(\d)\s*[\n\r]\s*(?=\d)/g, '$1');
+
+ const patterns = [
+ { regex: /(?:¥|¥|人民币)\s*([\d,,]+(?:\.\d+)?)\s*万元/i, priority: 1 },
+ { regex: /[((][¥¥]([\d,,]+(?:\.\d+)?)[))]/i, priority: 2, divider: 10000 },
+ { regex: /([\d,,]+(?:\.\d+)?)\s*万元/i, priority: 3 },
+ { regex: /(?:¥|¥|人民币)\s*([\d,,]+(?:\.\d+)?)\s*元/i, priority: 4, divider: 10000 },
+ { regex: /([\d,,]+(?:\.\d+)?)\s*元(?!整)/i, priority: 5, divider: 10000 }
+ ];
+
+ let bestMatch = null;
+ let bestPriority = Infinity;
+
+ for (const pattern of patterns) {
+ const match = cleanedContent.match(pattern.regex);
+ if (match && pattern.priority < bestPriority) {
+ const numberStr = match[1].replace(/[,,]/g, '');
+ let amount = parseFloat(numberStr);
+
+ if (pattern.divider) {
+ amount = amount / pattern.divider;
+ }
+
+ if (!isNaN(amount) && amount >= 0.01 && amount <= 100000000) {
+ bestMatch = {
+ amount,
+ unit: '万元',
+ text: match[0],
+ originalUnit: pattern.divider ? '元' : '万元'
+ };
+ bestPriority = pattern.priority;
+ }
+ }
+ }
+
+ return bestMatch;
+}
+
+// 从API获取PDF URL
+async function fetchPdfUrlFromApi(pageUrl) {
+ try {
+ const bulletinIdMatch = pageUrl.match(/bulletinDetails\/[^\/]+\/([a-f0-9]+)/i);
+ const bulletinTypeMatch = pageUrl.match(/bulletinType=(\d+)/);
+
+ if (!bulletinIdMatch) {
+ return null;
+ }
+
+ const bulletinId = bulletinIdMatch[1];
+ const bulletinType = bulletinTypeMatch ? bulletinTypeMatch[1] : '1';
+
+ const apiUrl = `https://api.jszbtb.com/DataGatewayApi/PublishBulletin/BulletinType/${bulletinType}/ID/${bulletinId}`;
+
+ const response = await http.get(apiUrl, {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+ 'Accept': 'application/json',
+ 'Referer': 'https://www.jszbcg.com/'
+ },
+ responseType: 'arraybuffer'
+ });
+
+ const responseText = iconv.decode(response.data, 'utf-8');
+ const data = JSON.parse(responseText);
+
+ if (data.success && data.data && data.data.signedPdfUrl) {
+ return data.data.signedPdfUrl;
+ }
+
+ return null;
+ } catch (err) {
+ return null;
+ }
+}
+
+function extractPdfUrl(html, pageUrl) {
+ const $ = cheerio.load(html);
+
+ let iframe = $('iframe').first();
+ if (!iframe.length) {
+ iframe = $('iframe[src*="pdf"]').first();
+ }
+ if (!iframe.length) {
+ iframe = $('iframe[src*="viewer"]').first();
+ }
+
+ if (iframe.length) {
+ const src = iframe.attr('src');
+ if (!src) return null;
+
+ const match = src.match(/[?&]file=([^&]+)/);
+ if (match) {
+ let pdfUrl = decodeURIComponent(match[1]);
+
+ if (!pdfUrl.startsWith('http://') && !pdfUrl.startsWith('https://')) {
+ try {
+ pdfUrl = new URL(pdfUrl, pageUrl).toString();
+ } catch (err) {
+ return null;
+ }
+ }
+
+ return pdfUrl;
+ }
+ }
+
+ return null;
+}
+
+async function fetchPdfContent(pdfUrl) {
+ try {
+ const { PDFParse } = await import('pdf-parse');
+
+ const response = await http.get(pdfUrl, {
+ responseType: 'arraybuffer',
+ timeout: 30000,
+ });
+
+ const parser = new PDFParse({ data: response.data });
+ const result = await parser.getText();
+ await parser.destroy();
+
+ return result.text;
+ } catch (err) {
+ throw err;
+ }
+}
+
+async function parseDetailEnhanced(html, pageUrl) {
+ const $ = cheerio.load(html);
+
+ let pdfUrl = null;
+
+ if (pageUrl.includes('jszbcg.com')) {
+ pdfUrl = await fetchPdfUrlFromApi(pageUrl);
+ }
+
+ if (!pdfUrl) {
+ pdfUrl = extractPdfUrl(html, pageUrl);
+ }
+
+ let content = '';
+ let pdfParsed = false;
+
+ if (pdfUrl) {
+ try {
+ content = await fetchPdfContent(pdfUrl);
+ pdfParsed = true;
+ } catch (err) {
+ const htmlDetail = parseDetail(html);
+ content = htmlDetail.content;
+ }
+ } else {
+ const htmlDetail = parseDetail(html);
+ content = htmlDetail.content;
+ }
+
+ const budget = extractBudget(content);
+ const basicInfo = parseDetail(html);
+
+ return {
+ ...basicInfo,
+ content,
+ budget,
+ hasPdf: pdfParsed,
+ pdfUrl: pdfParsed ? pdfUrl : null,
+ };
+}
+
+// 定时任务执行函数
+async function executeScheduledTask(config) {
+ try {
+ console.log('========================================');
+ console.log('定时任务开始执行');
+ console.log('执行时间:', new Date().toLocaleString('zh-CN'));
+ console.log('========================================');
+
+ const timeRange = config.scheduler.timeRange || 'thisMonth';
+ const { startDate, endDate } = getDateRangeByType(timeRange);
+ const threshold = config.scheduler.threshold || 100000; // 默认10亿(100000万元)
+
+ const timeRangeNames = {
+ 'today': '今日',
+ 'thisWeek': '本周',
+ 'thisMonth': '本月'
+ };
+ console.log(`采集时间段: ${timeRangeNames[timeRange] || '本月'}`);
+ console.log(`采集时间范围: ${startDate} 至 ${endDate}`);
+ console.log(`金额阈值: ${threshold}万元 (${threshold / 10000}亿元)`);
+
+ // 采集列表
+ const items = await fetchListByDateRange(startDate, endDate, 23);
+
+ if (items.length === 0) {
+ console.log('本月暂无公告数据');
+ return;
+ }
+
+ // 采集详情
+ console.log('========================================');
+ console.log(`开始采集 ${items.length} 条公告的详情...`);
+ const results = [];
+ for (let i = 0; i < items.length; i++) {
+ const item = items[i];
+ try {
+ console.log(`[${i + 1}/${items.length}] 正在采集: ${item.title}`);
+ const html = await fetchHtml(item.href);
+ const detail = await parseDetailEnhanced(html, item.href);
+ results.push({
+ ...item,
+ detail,
+ });
+ await new Promise((resolve) => setTimeout(resolve, 500));
+ } catch (err) {
+ console.error(`采集失败: ${err.message}`);
+ results.push({
+ ...item,
+ detail: null,
+ error: err.message,
+ });
+ }
+ }
+
+ // 筛选大于阈值的项目
+ const filtered = results.filter((item) => {
+ return item.detail?.budget && item.detail.budget.amount > threshold;
+ });
+
+ console.log('========================================');
+ console.log(`筛选结果: 找到 ${filtered.length} 个大于 ${threshold}万元 的项目`);
+
+ if (filtered.length === 0) {
+ console.log('本月暂无符合条件的大额项目');
+ return;
+ }
+
+ // 计算总金额
+ const total = filtered.reduce(
+ (sum, item) => sum + (item.detail.budget?.amount || 0),
+ 0
+ );
+
+ // 生成报告
+ const report = {
+ summary: {
+ total_count: results.length,
+ filtered_count: filtered.length,
+ threshold: `${threshold}万元`,
+ total_amount: `${total.toFixed(2)}万元`,
+ generated_at: new Date().toISOString(),
+ date_range: { startDate, endDate },
+ },
+ projects: filtered.map((item) => ({
+ title: item.title,
+ date: item.date,
+ publish_time: item.detail.publishTime,
+ budget: item.detail.budget,
+ url: item.href,
+ })),
+ };
+
+ // 发送邮件
+ console.log('========================================');
+ console.log('正在发送邮件报告...');
+ const emailConfig = config.email;
+
+ const result = await sendReportEmail(emailConfig, report);
+
+ console.log('邮件发送成功!');
+ console.log('收件人:', emailConfig.recipients);
+ console.log('MessageId:', result.messageId);
+ console.log('========================================');
+ console.log('定时任务执行完成');
+ console.log('========================================');
+
+ } catch (error) {
+ console.error('========================================');
+ console.error('定时任务执行失败:', error.message);
+ console.error(error.stack);
+ console.error('========================================');
+ }
+}
+
+// 存储当前的定时任务
+let currentScheduledTask = null;
+
+// 初始化定时任务
+export function initScheduler() {
+ const config = loadConfig();
+
+ if (!config) {
+ console.error('无法启动定时任务: 配置文件加载失败');
+ return;
+ }
+
+ if (!config.scheduler || !config.scheduler.enabled) {
+ console.log('定时任务已禁用');
+ return;
+ }
+
+ if (!config.email || !config.email.smtpHost || !config.email.smtpUser) {
+ console.error('无法启动定时任务: 邮件配置不完整');
+ console.error('请在 config.json 中配置邮件信息');
+ return;
+ }
+
+ const cronTime = config.scheduler.cronTime || '0 9 * * *';
+
+ console.log('========================================');
+ console.log('定时任务已启动');
+ console.log('执行计划:', cronTime);
+ console.log('金额阈值:', config.scheduler.threshold, '万元');
+ console.log('收件人:', config.email.recipients);
+ console.log('========================================');
+
+ // 如果已有任务在运行,先停止
+ if (currentScheduledTask) {
+ currentScheduledTask.stop();
+ console.log('已停止旧的定时任务');
+ }
+
+ // 创建定时任务
+ currentScheduledTask = cron.schedule(cronTime, () => {
+ executeScheduledTask(config);
+ }, {
+ timezone: 'Asia/Shanghai'
+ });
+}
+
+// 重新加载配置并重启定时任务
+export function reloadScheduler() {
+ console.log('重新加载定时任务配置...');
+
+ // 停止当前任务
+ if (currentScheduledTask) {
+ currentScheduledTask.stop();
+ currentScheduledTask = null;
+ console.log('已停止当前定时任务');
+ }
+
+ // 重新初始化
+ initScheduler();
+}
+
+// 停止定时任务
+export function stopScheduler() {
+ if (currentScheduledTask) {
+ currentScheduledTask.stop();
+ currentScheduledTask = null;
+ console.log('定时任务已停止');
+ return true;
+ }
+ return false;
+}
+
+// 获取定时任务状态
+export function getSchedulerStatus() {
+ const config = loadConfig();
+ return {
+ isRunning: currentScheduledTask !== null,
+ config: config ? {
+ enabled: config.scheduler?.enabled || false,
+ cronTime: config.scheduler?.cronTime || '0 9 * * *',
+ threshold: config.scheduler?.threshold || 100000,
+ } : null,
+ };
+}
+
+// 手动执行任务(用于测试)
+export async function runTaskNow() {
+ const config = loadConfig();
+ if (!config) {
+ throw new Error('配置文件加载失败');
+ }
+ await executeScheduledTask(config);
+}
diff --git a/src/server.js b/src/server.js
index 2afd464..d27416d 100644
--- a/src/server.js
+++ b/src/server.js
@@ -4,6 +4,7 @@ import axios from 'axios';
import * as cheerio from 'cheerio';
import iconv from 'iconv-lite';
import { sendReportEmail } from './emailService.js';
+import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js';
const app = express();
const PORT = 3000;
@@ -732,6 +733,99 @@ app.post('/api/test-pdf', async (req, res) => {
}
});
+// 获取配置
+app.get('/api/config', async (req, res) => {
+ try {
+ const { readFileSync } = await import('fs');
+ const { join } = await import('path');
+ const { fileURLToPath } = await import('url');
+ const { dirname } = await import('path');
+
+ const __filename = fileURLToPath(import.meta.url);
+ const __dirname = dirname(__filename);
+ const configPath = join(__dirname, '..', 'config.json');
+
+ const configContent = readFileSync(configPath, 'utf-8');
+ const config = JSON.parse(configContent);
+
+ // 不返回敏感信息(密码)
+ if (config.email && config.email.smtpPass) {
+ config.email.smtpPass = '***已配置***';
+ }
+
+ res.json({ success: true, data: config });
+ } catch (error) {
+ res.status(500).json({ success: false, error: error.message });
+ }
+});
+
+// 更新配置
+app.post('/api/config', async (req, res) => {
+ try {
+ const { writeFileSync, readFileSync } = await import('fs');
+ const { join } = await import('path');
+ const { fileURLToPath } = await import('url');
+ const { dirname } = await import('path');
+
+ const __filename = fileURLToPath(import.meta.url);
+ const __dirname = dirname(__filename);
+ const configPath = join(__dirname, '..', 'config.json');
+
+ const newConfig = req.body;
+
+ // 如果密码字段是占位符,保留原密码
+ if (newConfig.email && newConfig.email.smtpPass === '***已配置***') {
+ const oldConfigContent = readFileSync(configPath, 'utf-8');
+ const oldConfig = JSON.parse(oldConfigContent);
+ newConfig.email.smtpPass = oldConfig.email.smtpPass;
+ }
+
+ // 保存配置
+ writeFileSync(configPath, JSON.stringify(newConfig, null, 2), 'utf-8');
+
+ // 重新加载定时任务(如果定时任务配置有变化)
+ reloadScheduler();
+
+ res.json({ success: true, message: '配置已保存并重新加载定时任务' });
+ } catch (error) {
+ res.status(500).json({ success: false, error: error.message });
+ }
+});
+
+// 获取定时任务状态
+app.get('/api/scheduler/status', async (req, res) => {
+ try {
+ const status = getSchedulerStatus();
+ res.json({ success: true, data: status });
+ } catch (error) {
+ res.status(500).json({ success: false, error: error.message });
+ }
+});
+
+// 手动触发定时任务的API(用于测试)
+app.post('/api/run-scheduled-task', async (req, res) => {
+ try {
+ console.log('手动触发定时任务...');
+ // 在后台执行任务,不阻塞响应
+ runTaskNow().catch(err => {
+ console.error('定时任务执行失败:', err);
+ });
+ res.json({
+ success: true,
+ message: '定时任务已触发,正在后台执行...'
+ });
+ } catch (error) {
+ res.status(500).json({
+ success: false,
+ error: error.message
+ });
+ }
+});
+
app.listen(PORT, () => {
console.log(`Server running at http://localhost:${PORT}`);
+
+ // 启动定时任务
+ console.log('正在初始化定时任务...');
+ initScheduler();
});