diff --git a/config.example.json b/config.example.json new file mode 100644 index 0000000..bc04c4d --- /dev/null +++ b/config.example.json @@ -0,0 +1,21 @@ +{ + "scheduler": { + "enabled": true, + "cronTime": "0 9 * * *", + "threshold": 100000, + "description": "每天9点采集大于10亿的项目", + "timeRange": "thisMonth", + "pushRules": { + "urgentThreshold": 500000, + "urgentPush": false, + "summaryPush": true + } + }, + "email": { + "smtpHost": "smtp.example.com", + "smtpPort": 587, + "smtpUser": "your-email@example.com", + "smtpPass": "your-password", + "recipients": "recipient1@example.com,recipient2@example.com" + } +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..731d3c6 --- /dev/null +++ b/config.json @@ -0,0 +1,16 @@ +{ + "scheduler": { + "enabled": false, + "cronTime": "0 9 * * *", + "threshold": 100000, + "description": "每天9点采集大于10亿的项目", + "timeRange": "today" + }, + "email": { + "smtpHost": "smtp.qq.com", + "smtpPort": 587, + "smtpUser": "1076597680@qq.com", + "smtpPass": "nfrjdiraqddsjeeh", + "recipients": "5482498@qq.com" + } +} \ No newline at end of file diff --git a/node_modules/.package-lock.json b/node_modules/.package-lock.json index 96859aa..2606e5e 100644 --- a/node_modules/.package-lock.json +++ b/node_modules/.package-lock.json @@ -1011,6 +1011,15 @@ "node": ">= 0.6" } }, + "node_modules/node-cron": { + "version": "4.2.1", + "resolved": "https://registry.npmmirror.com/node-cron/-/node-cron-4.2.1.tgz", + "integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg==", + "license": "ISC", + "engines": { + "node": ">=6.0.0" + } + }, "node_modules/nodemailer": { "version": "7.0.11", "resolved": "https://registry.npmmirror.com/nodemailer/-/nodemailer-7.0.11.tgz", diff --git a/package-lock.json b/package-lock.json index a5988dd..7d35d59 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,6 +14,7 @@ "docx": "^9.5.1", "express": "^5.2.1", "iconv-lite": "^0.6.3", + "node-cron": "^4.2.1", "nodemailer": "^7.0.11", "pdf-parse": "^2.4.5" } @@ -1169,6 +1170,15 @@ "node": ">= 0.6" } }, + "node_modules/node-cron": { + "version": "4.2.1", + "resolved": "https://registry.npmmirror.com/node-cron/-/node-cron-4.2.1.tgz", + "integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg==", + "license": "ISC", + "engines": { + "node": ">=6.0.0" + } + }, "node_modules/nodemailer": { "version": "7.0.11", "resolved": "https://registry.npmmirror.com/nodemailer/-/nodemailer-7.0.11.tgz", diff --git a/package.json b/package.json index 9f895ed..8fd7ce1 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "docx": "^9.5.1", "express": "^5.2.1", "iconv-lite": "^0.6.3", + "node-cron": "^4.2.1", "nodemailer": "^7.0.11", "pdf-parse": "^2.4.5" } diff --git a/public/app.js b/public/app.js index 1d3733a..1b6041e 100644 --- a/public/app.js +++ b/public/app.js @@ -479,11 +479,6 @@ async function exportReport() { // ========== 邮件功能 ========== -// 页面加载时加载邮件配置 -document.addEventListener('DOMContentLoaded', function() { - loadEmailConfig(); -}); - // 保存邮件配置到localStorage function saveEmailConfig() { const config = { @@ -670,3 +665,296 @@ function showEmailStatus(message, type) { }, 3000); } } + +// ========== 定时任务功能 ========== + +// 将Cron表达式转换为友好的时间描述 +function cronToFriendlyText(cronTime) { + // 常见的预设值映射 + const cronMap = { + '0 9 * * *': '每天上午9点', + '0 6 * * *': '每天上午6点', + '0 12 * * *': '每天中午12点', + '0 18 * * *': '每天下午18点', + '0 9,18 * * *': '每天9点和18点', + '0 */6 * * *': '每6小时', + '0 */12 * * *': '每12小时', + '0 9 * * 1': '每周一上午9点', + '0 9 1 * *': '每月1日上午9点' + }; + + // 如果是预设值,直接返回 + if (cronMap[cronTime]) { + return cronMap[cronTime]; + } + + // 尝试解析自定义时间 "分 时 * * *" 格式 + const cronParts = cronTime.split(/\s+/); + if (cronParts.length === 5 && cronParts[2] === '*' && cronParts[3] === '*' && cronParts[4] === '*') { + const minute = cronParts[0]; + const hour = cronParts[1]; + + // 检查是否是整点 + if (minute === '0') { + return `每天${hour}点`; + } else { + return `每天${hour}点${minute}分`; + } + } + + // 如果无法解析,返回原始值 + return cronTime; +} + +// 加载定时任务配置 +async function loadSchedulerConfig() { + try { + // 从服务器获取配置 + const response = await fetch(`${API_BASE}/config`); + const data = await response.json(); + + if (data.success && data.data) { + const config = data.data; + + // 填充表单 + if (config.scheduler) { + document.getElementById('schedulerEnabled').checked = config.scheduler.enabled || false; + const cronTime = config.scheduler.cronTime || '0 9 * * *'; + document.getElementById('schedulerCronInput').value = cronTime; + document.getElementById('schedulerThresholdInput').value = config.scheduler.threshold || 100000; + document.getElementById('schedulerDescription').value = config.scheduler.description || ''; + + // 时间段配置 + document.getElementById('schedulerTimeRange').value = config.scheduler.timeRange || 'thisMonth'; + + // 反向映射Cron表达式到预设选择器 + const presetSelector = document.getElementById('schedulerCronPreset'); + const customGroup = document.getElementById('customCronGroup'); + + // 预设值列表 + const presets = [ + '0 9 * * *', + '0 6 * * *', + '0 12 * * *', + '0 18 * * *', + '0 9,18 * * *', + '0 */6 * * *', + '0 */12 * * *', + '0 9 * * 1', + '0 9 1 * *' + ]; + + // 检查是否匹配预设值 + if (presets.includes(cronTime)) { + presetSelector.value = cronTime; + customGroup.style.display = 'none'; + } else { + // 自定义时间 - 尝试解析为 "分 时 * * *" 格式 + presetSelector.value = 'custom'; + customGroup.style.display = 'block'; + + const cronParts = cronTime.split(/\s+/); + if (cronParts.length >= 2) { + document.getElementById('customMinute').value = cronParts[0]; + document.getElementById('customHour').value = cronParts[1]; + } + } + } + + // 更新状态显示 + await updateSchedulerStatus(); + } + } catch (error) { + console.error('加载定时任务配置失败:', error); + showSchedulerStatus('加载配置失败: ' + error.message, 'error'); + } +} + +// 处理Cron预设选择器变化 +function handleCronPresetChange() { + const preset = document.getElementById('schedulerCronPreset').value; + const customGroup = document.getElementById('customCronGroup'); + const cronInput = document.getElementById('schedulerCronInput'); + + if (preset === 'custom') { + // 显示自定义时间选择器 + customGroup.style.display = 'block'; + updateCustomCron(); // 根据自定义时间生成Cron表达式 + } else { + // 隐藏自定义时间选择器,使用预设Cron表达式 + customGroup.style.display = 'none'; + cronInput.value = preset; + } +} + +// 根据自定义小时和分钟生成Cron表达式 +function updateCustomCron() { + const hour = document.getElementById('customHour').value; + const minute = document.getElementById('customMinute').value; + const cronInput = document.getElementById('schedulerCronInput'); + + // 生成格式: 分 时 * * * (每天指定时间执行) + cronInput.value = `${minute} ${hour} * * *`; +} + +document.addEventListener('DOMContentLoaded', function() { + loadEmailConfig(); + loadSchedulerConfig(); + + // 添加自定义时间输入框的事件监听 + const customHour = document.getElementById('customHour'); + const customMinute = document.getElementById('customMinute'); + if (customHour) { + customHour.addEventListener('change', updateCustomCron); + } + if (customMinute) { + customMinute.addEventListener('change', updateCustomCron); + } +}); + +// 更新定时任务状态显示 +async function updateSchedulerStatus() { + try { + const response = await fetch(`${API_BASE}/scheduler/status`); + const data = await response.json(); + + if (data.success && data.data) { + const status = data.data; + + // 更新运行状态 + const statusText = status.isRunning ? '✓ 运行中' : '✗ 未运行'; + const statusColor = status.isRunning ? '#28a745' : '#dc3545'; + document.getElementById('schedulerRunningStatus').innerHTML = `${statusText}`; + + // 更新执行计划 + if (status.config) { + document.getElementById('schedulerCronTime').textContent = cronToFriendlyText(status.config.cronTime); + const thresholdBillion = (status.config.threshold / 10000).toFixed(1); + document.getElementById('schedulerThreshold').textContent = `${status.config.threshold}万元 (${thresholdBillion}亿)`; + } + } + } catch (error) { + console.error('获取定时任务状态失败:', error); + } +} + +// 保存定时任务配置 +async function saveSchedulerConfig() { + const schedulerConfig = { + enabled: document.getElementById('schedulerEnabled').checked, + cronTime: document.getElementById('schedulerCronInput').value, + threshold: parseInt(document.getElementById('schedulerThresholdInput').value), + description: document.getElementById('schedulerDescription').value, + timeRange: document.getElementById('schedulerTimeRange').value + }; + + // 验证Cron表达式格式(简单验证) + const cronParts = schedulerConfig.cronTime.trim().split(/\s+/); + if (cronParts.length !== 5) { + showSchedulerStatus('Cron表达式格式错误,应为5个部分(分 时 日 月 周)', 'error'); + return; + } + + // 从localStorage获取邮件配置 + const emailConfigStr = localStorage.getItem('emailConfig'); + let emailConfig = {}; + + if (emailConfigStr) { + try { + emailConfig = JSON.parse(emailConfigStr); + } catch (e) { + console.error('解析邮件配置失败:', e); + } + } + + // 如果邮件配置为空,提示用户 + if (!emailConfig.smtpHost || !emailConfig.smtpUser) { + if (confirm('检测到邮件配置未完成,定时任务需要邮件配置才能发送报告。\n\n是否继续保存定时任务配置(不保存邮件配置)?')) { + // 继续保存,但不包含邮件配置 + } else { + return; + } + } + + // 构建完整配置对象 + const fullConfig = { + scheduler: schedulerConfig, + email: emailConfig + }; + + showSchedulerStatus('正在保存配置...', 'info'); + + try { + const response = await fetch(`${API_BASE}/config`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(fullConfig) + }); + + const data = await response.json(); + + if (data.success) { + showSchedulerStatus('配置已保存,定时任务已重新加载!', 'success'); + // 刷新状态显示 + await updateSchedulerStatus(); + } else { + showSchedulerStatus(`保存失败: ${data.error}`, 'error'); + } + } catch (error) { + showSchedulerStatus(`请求失败: ${error.message}`, 'error'); + } +} + +// 立即测试运行定时任务 +async function testSchedulerNow() { + if (!confirm('确定要立即执行定时任务吗?\n\n这将采集本月大于阈值的项目并发送邮件,可能需要几分钟时间。')) { + return; + } + + showSchedulerStatus('正在后台执行定时任务,请稍候...', 'info'); + + try { + const response = await fetch(`${API_BASE}/run-scheduled-task`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' } + }); + + const data = await response.json(); + + if (data.success) { + showSchedulerStatus('定时任务已在后台开始执行,完成后将发送邮件。请查看服务器控制台日志了解进度。', 'success'); + } else { + showSchedulerStatus(`执行失败: ${data.error}`, 'error'); + } + } catch (error) { + showSchedulerStatus(`请求失败: ${error.message}`, 'error'); + } +} + +// 显示定时任务配置状态 +function showSchedulerStatus(message, type) { + const statusDiv = document.getElementById('schedulerConfigStatus'); + const bgColors = { + success: '#d4edda', + error: '#f8d7da', + info: '#d1ecf1' + }; + const textColors = { + success: '#155724', + error: '#721c24', + info: '#0c5460' + }; + + statusDiv.innerHTML = ` +
+ ${message} +
+ `; + + // 3秒后自动隐藏成功消息 + if (type === 'success') { + setTimeout(() => { + statusDiv.innerHTML = ''; + }, 3000); + } +} diff --git a/public/index.html b/public/index.html index 3ff1b9c..2be93f4 100644 --- a/public/index.html +++ b/public/index.html @@ -342,6 +342,7 @@ + @@ -470,6 +471,118 @@
+ +
+

定时任务配置

+

配置定时任务自动采集本月大于指定金额的项目并发送邮件报告

+ + +
+

任务状态

+
+
+
运行状态
+
加载中...
+
+
+
执行时间
+
-
+
+
+
金额阈值
+
-
+
+
+
+ + +
+
+ + +
+
+ +
+ + +
+ + + + + + + +
+ + + + 今日:今天 | 本周:本周一至今 | 本月:本月1日至今 + +
+ +
+ + + + 10亿 = 100000万元 | 5亿 = 50000万元 | 1亿 = 10000万元 + +
+ +
+ + +
+ + + + + +
+ +
+

使用说明

+ +
+
+

邮件配置

diff --git a/src/scheduler.js b/src/scheduler.js new file mode 100644 index 0000000..4005e7d --- /dev/null +++ b/src/scheduler.js @@ -0,0 +1,631 @@ +import cron from 'node-cron'; +import { readFileSync } from 'fs'; +import { fileURLToPath } from 'url'; +import { dirname, join } from 'path'; +import axios from 'axios'; +import * as cheerio from 'cheerio'; +import iconv from 'iconv-lite'; +import { sendReportEmail } from './emailService.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// 加载配置文件 +function loadConfig() { + try { + const configPath = join(__dirname, '..', 'config.json'); + const configContent = readFileSync(configPath, 'utf-8'); + return JSON.parse(configContent); + } catch (error) { + console.error('加载配置文件失败:', error.message); + console.error('请确保 config.json 文件存在并配置正确'); + return null; + } +} + +// 根据时间范围类型获取开始和结束日期 +function getDateRangeByType(timeRange) { + const now = new Date(); + const year = now.getFullYear(); + const month = String(now.getMonth() + 1).padStart(2, '0'); + const day = String(now.getDate()).padStart(2, '0'); + + let startDate, endDate; + endDate = `${year}-${month}-${day}`; // 结束日期都是今天 + + switch (timeRange) { + case 'today': + // 今日 + startDate = `${year}-${month}-${day}`; + break; + + case 'thisWeek': { + // 本周 (从周一开始) + const dayOfWeek = now.getDay(); // 0是周日,1是周一 + const diff = dayOfWeek === 0 ? 6 : dayOfWeek - 1; // 计算到周一的天数差 + const monday = new Date(now); + monday.setDate(now.getDate() - diff); + const weekYear = monday.getFullYear(); + const weekMonth = String(monday.getMonth() + 1).padStart(2, '0'); + const weekDay = String(monday.getDate()).padStart(2, '0'); + startDate = `${weekYear}-${weekMonth}-${weekDay}`; + break; + } + + case 'thisMonth': + default: + // 本月 + startDate = `${year}-${month}-01`; + break; + } + + return { startDate, endDate }; +} + +// 获取本月的开始和结束日期 (兼容旧代码) +function getCurrentMonthDateRange() { + return getDateRangeByType('thisMonth'); +} + +// 从server.js复制的辅助函数 +const BASE_URL = 'https://gjzx.nanjing.gov.cn/gggs/'; + +const http = axios.create({ + responseType: 'arraybuffer', + timeout: 10000, + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; gjzx-scraper/1.0)', + }, +}); + +function pickEncoding(contentType = '') { + const match = /charset=([^;]+)/i.exec(contentType); + if (!match) return 'utf-8'; + const charset = match[1].trim().toLowerCase(); + if (charset.includes('gb')) return 'gbk'; + return charset; +} + +async function fetchHtml(url) { + const res = await http.get(url); + const encoding = pickEncoding(res.headers['content-type']); + const html = iconv.decode(res.data, encoding || 'utf-8'); + return html; +} + +function getPageUrl(pageIndex, baseUrl = BASE_URL) { + if (pageIndex === 0) { + return baseUrl; + } + const cleanBaseUrl = baseUrl.replace(/\/$/, ''); + return `${cleanBaseUrl}/index_${pageIndex}.html`; +} + +function parseList(html) { + const $ = cheerio.load(html); + const items = []; + + $('table tr').each((_, row) => { + const $row = $(row); + const link = $row.find('td:first-child a').first(); + const dateCell = $row.find('td:nth-child(2)'); + + if (link.length && dateCell.length) { + const title = link.attr('title') || link.text().trim(); + const rawHref = link.attr('href') || ''; + const dateText = dateCell.text().trim(); + + if (!rawHref || !title || title.length < 5) return; + if (rawHref === './' || rawHref === '../') return; + if (!/^\d{4}-\d{2}-\d{2}$/.test(dateText)) return; + + try { + const href = new URL(rawHref, BASE_URL).toString(); + items.push({ title, href, date: dateText }); + } catch (err) { + return; + } + } + }); + + return items; +} + +function isDateInRange(dateStr, startDate, endDate) { + if (!dateStr) return false; + const date = new Date(dateStr); + if (isNaN(date.getTime())) return false; + + if (startDate && date < new Date(startDate)) return false; + if (endDate && date > new Date(endDate)) return false; + return true; +} + +async function fetchListByDateRange(startDate, endDate, maxPages = 23) { + const allItems = []; + let shouldContinue = true; + let pageIndex = 0; + + console.log(`开始按时间范围采集: ${startDate || '不限'} 至 ${endDate || '不限'}`); + + while (shouldContinue && pageIndex < maxPages) { + const pageUrl = getPageUrl(pageIndex); + console.log(`正在采集第 ${pageIndex + 1} 页: ${pageUrl}`); + + try { + const html = await fetchHtml(pageUrl); + const items = parseList(html); + + if (items.length === 0) { + console.log(`第 ${pageIndex + 1} 页没有数据,停止采集`); + break; + } + + let hasItemsInRange = false; + let allItemsBeforeRange = true; + + for (const item of items) { + if (isDateInRange(item.date, startDate, endDate)) { + allItems.push(item); + hasItemsInRange = true; + allItemsBeforeRange = false; + } else if (startDate && new Date(item.date) < new Date(startDate)) { + allItemsBeforeRange = allItemsBeforeRange && true; + } else { + allItemsBeforeRange = false; + } + } + + if (allItemsBeforeRange && startDate) { + console.log(`第 ${pageIndex + 1} 页所有项目都早于起始日期,停止采集`); + shouldContinue = false; + } + + console.log(`第 ${pageIndex + 1} 页找到 ${items.length} 条,符合条件 ${hasItemsInRange ? '有' : '无'}`); + + pageIndex++; + + if (shouldContinue && pageIndex < maxPages) { + await new Promise(resolve => setTimeout(resolve, 500)); + } + } catch (err) { + console.error(`采集第 ${pageIndex + 1} 页失败: ${err.message}`); + break; + } + } + + console.log(`总共采集了 ${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`); + return allItems; +} + +// 从server.js导入parseDetail相关函数 +function parseDetail(html) { + const $ = cheerio.load(html); + + let title = $('.title18').text().trim(); + if (!title) { + title = $('.article-info h1').text().trim(); + } + if (!title) { + title = $('h1').first().text().trim(); + } + + const publishTd = $('td:contains("发布部门")').filter((_, el) => { + return $(el).text().includes('发布时间'); + }); + const publishText = publishTd.text().trim(); + let timeMatch = publishText.match(/(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/); + let publishTime = timeMatch ? timeMatch[1] : ''; + + if (!publishTime) { + const infoText = $('.info-sources').text() || $('body').text(); + timeMatch = infoText.match(/(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2})/); + publishTime = timeMatch ? timeMatch[1] : ''; + } + + let content = ''; + const contentSelectors = [ + '.zhenwen td', + '.con', + '.article-content', + '.ewb-article-content', + 'body' + ]; + + for (const selector of contentSelectors) { + const el = $(selector).first(); + if (el.length > 0) { + const text = el.text().trim(); + if (text.length > content.length) { + content = text; + } + } + } + + const budget = extractBudget(content); + + return { + title, + publishTime, + content, + budget, + }; +} + +function extractBudget(content) { + let cleanedContent = content.replace(/(\d)\s*[\n\r]\s*(?=\d)/g, '$1'); + + const patterns = [ + { regex: /(?:¥|¥|人民币)\s*([\d,,]+(?:\.\d+)?)\s*万元/i, priority: 1 }, + { regex: /[((][¥¥]([\d,,]+(?:\.\d+)?)[))]/i, priority: 2, divider: 10000 }, + { regex: /([\d,,]+(?:\.\d+)?)\s*万元/i, priority: 3 }, + { regex: /(?:¥|¥|人民币)\s*([\d,,]+(?:\.\d+)?)\s*元/i, priority: 4, divider: 10000 }, + { regex: /([\d,,]+(?:\.\d+)?)\s*元(?!整)/i, priority: 5, divider: 10000 } + ]; + + let bestMatch = null; + let bestPriority = Infinity; + + for (const pattern of patterns) { + const match = cleanedContent.match(pattern.regex); + if (match && pattern.priority < bestPriority) { + const numberStr = match[1].replace(/[,,]/g, ''); + let amount = parseFloat(numberStr); + + if (pattern.divider) { + amount = amount / pattern.divider; + } + + if (!isNaN(amount) && amount >= 0.01 && amount <= 100000000) { + bestMatch = { + amount, + unit: '万元', + text: match[0], + originalUnit: pattern.divider ? '元' : '万元' + }; + bestPriority = pattern.priority; + } + } + } + + return bestMatch; +} + +// 从API获取PDF URL +async function fetchPdfUrlFromApi(pageUrl) { + try { + const bulletinIdMatch = pageUrl.match(/bulletinDetails\/[^\/]+\/([a-f0-9]+)/i); + const bulletinTypeMatch = pageUrl.match(/bulletinType=(\d+)/); + + if (!bulletinIdMatch) { + return null; + } + + const bulletinId = bulletinIdMatch[1]; + const bulletinType = bulletinTypeMatch ? bulletinTypeMatch[1] : '1'; + + const apiUrl = `https://api.jszbtb.com/DataGatewayApi/PublishBulletin/BulletinType/${bulletinType}/ID/${bulletinId}`; + + const response = await http.get(apiUrl, { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', + 'Accept': 'application/json', + 'Referer': 'https://www.jszbcg.com/' + }, + responseType: 'arraybuffer' + }); + + const responseText = iconv.decode(response.data, 'utf-8'); + const data = JSON.parse(responseText); + + if (data.success && data.data && data.data.signedPdfUrl) { + return data.data.signedPdfUrl; + } + + return null; + } catch (err) { + return null; + } +} + +function extractPdfUrl(html, pageUrl) { + const $ = cheerio.load(html); + + let iframe = $('iframe').first(); + if (!iframe.length) { + iframe = $('iframe[src*="pdf"]').first(); + } + if (!iframe.length) { + iframe = $('iframe[src*="viewer"]').first(); + } + + if (iframe.length) { + const src = iframe.attr('src'); + if (!src) return null; + + const match = src.match(/[?&]file=([^&]+)/); + if (match) { + let pdfUrl = decodeURIComponent(match[1]); + + if (!pdfUrl.startsWith('http://') && !pdfUrl.startsWith('https://')) { + try { + pdfUrl = new URL(pdfUrl, pageUrl).toString(); + } catch (err) { + return null; + } + } + + return pdfUrl; + } + } + + return null; +} + +async function fetchPdfContent(pdfUrl) { + try { + const { PDFParse } = await import('pdf-parse'); + + const response = await http.get(pdfUrl, { + responseType: 'arraybuffer', + timeout: 30000, + }); + + const parser = new PDFParse({ data: response.data }); + const result = await parser.getText(); + await parser.destroy(); + + return result.text; + } catch (err) { + throw err; + } +} + +async function parseDetailEnhanced(html, pageUrl) { + const $ = cheerio.load(html); + + let pdfUrl = null; + + if (pageUrl.includes('jszbcg.com')) { + pdfUrl = await fetchPdfUrlFromApi(pageUrl); + } + + if (!pdfUrl) { + pdfUrl = extractPdfUrl(html, pageUrl); + } + + let content = ''; + let pdfParsed = false; + + if (pdfUrl) { + try { + content = await fetchPdfContent(pdfUrl); + pdfParsed = true; + } catch (err) { + const htmlDetail = parseDetail(html); + content = htmlDetail.content; + } + } else { + const htmlDetail = parseDetail(html); + content = htmlDetail.content; + } + + const budget = extractBudget(content); + const basicInfo = parseDetail(html); + + return { + ...basicInfo, + content, + budget, + hasPdf: pdfParsed, + pdfUrl: pdfParsed ? pdfUrl : null, + }; +} + +// 定时任务执行函数 +async function executeScheduledTask(config) { + try { + console.log('========================================'); + console.log('定时任务开始执行'); + console.log('执行时间:', new Date().toLocaleString('zh-CN')); + console.log('========================================'); + + const timeRange = config.scheduler.timeRange || 'thisMonth'; + const { startDate, endDate } = getDateRangeByType(timeRange); + const threshold = config.scheduler.threshold || 100000; // 默认10亿(100000万元) + + const timeRangeNames = { + 'today': '今日', + 'thisWeek': '本周', + 'thisMonth': '本月' + }; + console.log(`采集时间段: ${timeRangeNames[timeRange] || '本月'}`); + console.log(`采集时间范围: ${startDate} 至 ${endDate}`); + console.log(`金额阈值: ${threshold}万元 (${threshold / 10000}亿元)`); + + // 采集列表 + const items = await fetchListByDateRange(startDate, endDate, 23); + + if (items.length === 0) { + console.log('本月暂无公告数据'); + return; + } + + // 采集详情 + console.log('========================================'); + console.log(`开始采集 ${items.length} 条公告的详情...`); + const results = []; + for (let i = 0; i < items.length; i++) { + const item = items[i]; + try { + console.log(`[${i + 1}/${items.length}] 正在采集: ${item.title}`); + const html = await fetchHtml(item.href); + const detail = await parseDetailEnhanced(html, item.href); + results.push({ + ...item, + detail, + }); + await new Promise((resolve) => setTimeout(resolve, 500)); + } catch (err) { + console.error(`采集失败: ${err.message}`); + results.push({ + ...item, + detail: null, + error: err.message, + }); + } + } + + // 筛选大于阈值的项目 + const filtered = results.filter((item) => { + return item.detail?.budget && item.detail.budget.amount > threshold; + }); + + console.log('========================================'); + console.log(`筛选结果: 找到 ${filtered.length} 个大于 ${threshold}万元 的项目`); + + if (filtered.length === 0) { + console.log('本月暂无符合条件的大额项目'); + return; + } + + // 计算总金额 + const total = filtered.reduce( + (sum, item) => sum + (item.detail.budget?.amount || 0), + 0 + ); + + // 生成报告 + const report = { + summary: { + total_count: results.length, + filtered_count: filtered.length, + threshold: `${threshold}万元`, + total_amount: `${total.toFixed(2)}万元`, + generated_at: new Date().toISOString(), + date_range: { startDate, endDate }, + }, + projects: filtered.map((item) => ({ + title: item.title, + date: item.date, + publish_time: item.detail.publishTime, + budget: item.detail.budget, + url: item.href, + })), + }; + + // 发送邮件 + console.log('========================================'); + console.log('正在发送邮件报告...'); + const emailConfig = config.email; + + const result = await sendReportEmail(emailConfig, report); + + console.log('邮件发送成功!'); + console.log('收件人:', emailConfig.recipients); + console.log('MessageId:', result.messageId); + console.log('========================================'); + console.log('定时任务执行完成'); + console.log('========================================'); + + } catch (error) { + console.error('========================================'); + console.error('定时任务执行失败:', error.message); + console.error(error.stack); + console.error('========================================'); + } +} + +// 存储当前的定时任务 +let currentScheduledTask = null; + +// 初始化定时任务 +export function initScheduler() { + const config = loadConfig(); + + if (!config) { + console.error('无法启动定时任务: 配置文件加载失败'); + return; + } + + if (!config.scheduler || !config.scheduler.enabled) { + console.log('定时任务已禁用'); + return; + } + + if (!config.email || !config.email.smtpHost || !config.email.smtpUser) { + console.error('无法启动定时任务: 邮件配置不完整'); + console.error('请在 config.json 中配置邮件信息'); + return; + } + + const cronTime = config.scheduler.cronTime || '0 9 * * *'; + + console.log('========================================'); + console.log('定时任务已启动'); + console.log('执行计划:', cronTime); + console.log('金额阈值:', config.scheduler.threshold, '万元'); + console.log('收件人:', config.email.recipients); + console.log('========================================'); + + // 如果已有任务在运行,先停止 + if (currentScheduledTask) { + currentScheduledTask.stop(); + console.log('已停止旧的定时任务'); + } + + // 创建定时任务 + currentScheduledTask = cron.schedule(cronTime, () => { + executeScheduledTask(config); + }, { + timezone: 'Asia/Shanghai' + }); +} + +// 重新加载配置并重启定时任务 +export function reloadScheduler() { + console.log('重新加载定时任务配置...'); + + // 停止当前任务 + if (currentScheduledTask) { + currentScheduledTask.stop(); + currentScheduledTask = null; + console.log('已停止当前定时任务'); + } + + // 重新初始化 + initScheduler(); +} + +// 停止定时任务 +export function stopScheduler() { + if (currentScheduledTask) { + currentScheduledTask.stop(); + currentScheduledTask = null; + console.log('定时任务已停止'); + return true; + } + return false; +} + +// 获取定时任务状态 +export function getSchedulerStatus() { + const config = loadConfig(); + return { + isRunning: currentScheduledTask !== null, + config: config ? { + enabled: config.scheduler?.enabled || false, + cronTime: config.scheduler?.cronTime || '0 9 * * *', + threshold: config.scheduler?.threshold || 100000, + } : null, + }; +} + +// 手动执行任务(用于测试) +export async function runTaskNow() { + const config = loadConfig(); + if (!config) { + throw new Error('配置文件加载失败'); + } + await executeScheduledTask(config); +} diff --git a/src/server.js b/src/server.js index 2afd464..d27416d 100644 --- a/src/server.js +++ b/src/server.js @@ -4,6 +4,7 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; import iconv from 'iconv-lite'; import { sendReportEmail } from './emailService.js'; +import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js'; const app = express(); const PORT = 3000; @@ -732,6 +733,99 @@ app.post('/api/test-pdf', async (req, res) => { } }); +// 获取配置 +app.get('/api/config', async (req, res) => { + try { + const { readFileSync } = await import('fs'); + const { join } = await import('path'); + const { fileURLToPath } = await import('url'); + const { dirname } = await import('path'); + + const __filename = fileURLToPath(import.meta.url); + const __dirname = dirname(__filename); + const configPath = join(__dirname, '..', 'config.json'); + + const configContent = readFileSync(configPath, 'utf-8'); + const config = JSON.parse(configContent); + + // 不返回敏感信息(密码) + if (config.email && config.email.smtpPass) { + config.email.smtpPass = '***已配置***'; + } + + res.json({ success: true, data: config }); + } catch (error) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +// 更新配置 +app.post('/api/config', async (req, res) => { + try { + const { writeFileSync, readFileSync } = await import('fs'); + const { join } = await import('path'); + const { fileURLToPath } = await import('url'); + const { dirname } = await import('path'); + + const __filename = fileURLToPath(import.meta.url); + const __dirname = dirname(__filename); + const configPath = join(__dirname, '..', 'config.json'); + + const newConfig = req.body; + + // 如果密码字段是占位符,保留原密码 + if (newConfig.email && newConfig.email.smtpPass === '***已配置***') { + const oldConfigContent = readFileSync(configPath, 'utf-8'); + const oldConfig = JSON.parse(oldConfigContent); + newConfig.email.smtpPass = oldConfig.email.smtpPass; + } + + // 保存配置 + writeFileSync(configPath, JSON.stringify(newConfig, null, 2), 'utf-8'); + + // 重新加载定时任务(如果定时任务配置有变化) + reloadScheduler(); + + res.json({ success: true, message: '配置已保存并重新加载定时任务' }); + } catch (error) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +// 获取定时任务状态 +app.get('/api/scheduler/status', async (req, res) => { + try { + const status = getSchedulerStatus(); + res.json({ success: true, data: status }); + } catch (error) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +// 手动触发定时任务的API(用于测试) +app.post('/api/run-scheduled-task', async (req, res) => { + try { + console.log('手动触发定时任务...'); + // 在后台执行任务,不阻塞响应 + runTaskNow().catch(err => { + console.error('定时任务执行失败:', err); + }); + res.json({ + success: true, + message: '定时任务已触发,正在后台执行...' + }); + } catch (error) { + res.status(500).json({ + success: false, + error: error.message + }); + } +}); + app.listen(PORT, () => { console.log(`Server running at http://localhost:${PORT}`); + + // 启动定时任务 + console.log('正在初始化定时任务...'); + initScheduler(); });