feat(readme): 对部分文本进行格式调整,包括金额数字空格分隔、API 参数说明优化、标题层级对齐等,提升可读性。
```
This commit is contained in:
2025-12-15 10:36:18 +08:00
parent 745faa0ecc
commit b044e918aa
9 changed files with 949 additions and 80 deletions

213
src/emailService.js Normal file
View File

@@ -0,0 +1,213 @@
import nodemailer from 'nodemailer';
// 创建邮件发送服务
export async function sendReportEmail(emailConfig, report) {
try {
// 创建SMTP传输器
const transporter = nodemailer.createTransport({
host: emailConfig.smtpHost,
port: emailConfig.smtpPort || 587,
secure: emailConfig.smtpPort === 465, // true for 465, false for other ports
auth: {
user: emailConfig.smtpUser,
pass: emailConfig.smtpPass,
},
});
// 生成HTML格式的报告内容
const htmlContent = generateReportHtml(report);
// 发送邮件
const info = await transporter.sendMail({
from: `"公告采集系统" <${emailConfig.smtpUser}>`,
to: emailConfig.recipients,
subject: `采购公告分析报告 - ${new Date().toLocaleDateString('zh-CN')}`,
html: htmlContent,
});
return {
success: true,
messageId: info.messageId,
};
} catch (error) {
console.error('发送邮件失败:', error);
throw new Error(`邮件发送失败: ${error.message}`);
}
}
// 生成HTML格式的报告
function generateReportHtml(report) {
const { summary, projects } = report;
return `
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>采购公告分析报告</title>
<style>
body {
font-family: 'PingFang SC', 'Microsoft YaHei', Arial, sans-serif;
line-height: 1.6;
color: #333;
max-width: 900px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
.container {
background: white;
border-radius: 8px;
padding: 30px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
h1 {
color: #667eea;
border-bottom: 3px solid #667eea;
padding-bottom: 10px;
margin-bottom: 20px;
}
.summary {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 8px;
margin-bottom: 30px;
}
.summary h2 {
margin-top: 0;
margin-bottom: 15px;
font-size: 18px;
}
.stat-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
}
.stat {
background: rgba(255,255,255,0.15);
padding: 12px;
border-radius: 6px;
}
.stat-label {
font-size: 13px;
opacity: 0.9;
margin-bottom: 5px;
}
.stat-value {
font-size: 22px;
font-weight: bold;
}
.project-list {
margin-top: 20px;
}
.project-item {
background: #f9f9f9;
border-left: 4px solid #667eea;
padding: 15px;
margin-bottom: 15px;
border-radius: 4px;
}
.project-item h3 {
color: #333;
margin: 0 0 10px 0;
font-size: 16px;
}
.project-meta {
color: #666;
font-size: 14px;
margin: 5px 0;
}
.budget {
display: inline-block;
background: #667eea;
color: white;
padding: 4px 12px;
border-radius: 4px;
font-weight: bold;
margin-top: 8px;
font-size: 14px;
}
.project-link {
color: #667eea;
text-decoration: none;
font-size: 13px;
word-break: break-all;
}
.footer {
margin-top: 30px;
padding-top: 20px;
border-top: 1px solid #e0e0e0;
color: #999;
font-size: 12px;
text-align: center;
}
</style>
</head>
<body>
<div class="container">
<h1>南京公共工程建设中心 - 采购公告分析报告</h1>
<div class="summary">
<h2>报告摘要</h2>
<div class="stat-grid">
<div class="stat">
<div class="stat-label">总公告数量</div>
<div class="stat-value">${summary.total_count} 条</div>
</div>
<div class="stat">
<div class="stat-label">符合条件</div>
<div class="stat-value">${summary.filtered_count} 条</div>
</div>
<div class="stat">
<div class="stat-label">金额阈值</div>
<div class="stat-value">${summary.threshold}</div>
</div>
<div class="stat">
<div class="stat-label">总金额</div>
<div class="stat-value">${summary.total_amount}</div>
</div>
</div>
${summary.date_range ? `
<div style="margin-top: 15px; padding-top: 15px; border-top: 1px solid rgba(255,255,255,0.2);">
<div class="stat-label">时间范围</div>
<div style="font-size: 14px; margin-top: 5px;">
${summary.date_range.startDate || '不限'}${summary.date_range.endDate || '不限'}
</div>
</div>
` : ''}
</div>
<h2>项目详情</h2>
<div class="project-list">
${projects.length === 0 ? '<p style="color: #999; text-align: center; padding: 20px;">暂无符合条件的项目</p>' : ''}
${projects.map((project, index) => `
<div class="project-item">
<h3>${index + 1}. ${project.title}</h3>
<div class="project-meta">
<strong>发布日期:</strong> ${project.date}
${project.publish_time ? ` | <strong>发布时间:</strong> ${project.publish_time}` : ''}
</div>
${project.budget ? `
<div class="budget">
预算金额: ${project.budget.amount.toFixed(2)} ${project.budget.unit}
${project.budget.originalUnit !== project.budget.unit ? ` (原始: ${project.budget.originalUnit})` : ''}
</div>
` : ''}
<div style="margin-top: 10px;">
<a href="${project.url}" class="project-link" target="_blank">${project.url}</a>
</div>
</div>
`).join('')}
</div>
<div class="footer">
<p>报告生成时间: ${new Date(summary.generated_at).toLocaleString('zh-CN')}</p>
<p>本报告由公告采集系统自动生成</p>
</div>
</div>
</body>
</html>
`;
}

View File

@@ -3,6 +3,7 @@ import cors from 'cors';
import axios from 'axios';
import * as cheerio from 'cheerio';
import iconv from 'iconv-lite';
import { sendReportEmail } from './emailService.js';
const app = express();
const PORT = 3000;
@@ -33,24 +34,24 @@ function isDateInRange(dateStr, startDate, endDate) {
return true;
}
// 按时间范围抓取多页列表
// 按时间范围采集多页列表
async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
const allItems = [];
let shouldContinue = true;
let pageIndex = 0;
console.log(`开始按时间范围抓取: ${startDate || '不限'}${endDate || '不限'}`);
console.log(`开始按时间范围采集: ${startDate || '不限'}${endDate || '不限'}`);
while (shouldContinue && pageIndex < maxPages) {
const pageUrl = getPageUrl(pageIndex);
console.log(`正在抓取${pageIndex + 1} 页: ${pageUrl}`);
console.log(`正在采集${pageIndex + 1} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const items = parseList(html);
if (items.length === 0) {
console.log(`${pageIndex + 1} 页没有数据,停止抓取`);
console.log(`${pageIndex + 1} 页没有数据,停止采集`);
break;
}
@@ -70,7 +71,7 @@ async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
}
if (allItemsBeforeRange && startDate) {
console.log(`${pageIndex + 1} 页所有项目都早于起始日期,停止抓取`);
console.log(`${pageIndex + 1} 页所有项目都早于起始日期,停止采集`);
shouldContinue = false;
}
@@ -82,12 +83,12 @@ async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`抓取${pageIndex + 1} 页失败: ${err.message}`);
console.error(`采集${pageIndex + 1} 页失败: ${err.message}`);
break;
}
}
console.log(`总共抓取${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`);
console.log(`总共采集${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`);
return allItems;
}
@@ -207,6 +208,10 @@ function parseDetail(html) {
}
function extractBudget(content) {
// 预处理内容:去除数字之间的换行符和空白字符
// 这样可以匹配被换行符分隔的数字,例如 "1\n1\n0\n9\n0\n0" -> "110900"
let cleanedContent = content.replace(/(\d)\s*[\n\r]\s*(?=\d)/g, '$1');
// 直接定义金额匹配模式(从高优先级到低优先级)
const patterns = [
// 优先级1: 带货币符号的万元
@@ -230,7 +235,7 @@ function extractBudget(content) {
// 遍历所有模式,找到优先级最高的匹配
for (const pattern of patterns) {
const match = content.match(pattern.regex);
const match = cleanedContent.match(pattern.regex);
if (match && pattern.priority < bestPriority) {
// 清理数字中的逗号并转换
const numberStr = match[1].replace(/[,]/g, '');
@@ -329,21 +334,21 @@ app.post('/api/report', async (req, res) => {
const { limit = 15, threshold = 50, url } = req.body;
const targetUrl = url && url.trim() !== '' ? url : BASE_URL;
// 按需抓取多页以获取足够的数据
// 按需采集多页以获取足够的数据
const items = [];
let pageIndex = 0;
const maxPagesToFetch = Math.ceil(limit / 10) + 1; // 假设每页约10条多抓一页保险
while (items.length < limit && pageIndex < maxPagesToFetch) {
const pageUrl = getPageUrl(pageIndex, targetUrl);
console.log(`正在抓取${pageIndex + 1} 页: ${pageUrl}`);
console.log(`正在采集${pageIndex + 1} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const pageItems = parseList(html);
if (pageItems.length === 0) {
console.log(`${pageIndex + 1} 页没有数据,停止抓取`);
console.log(`${pageIndex + 1} 页没有数据,停止采集`);
break;
}
@@ -354,7 +359,7 @@ app.post('/api/report', async (req, res) => {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`抓取${pageIndex + 1} 页失败: ${err.message}`);
console.error(`采集${pageIndex + 1} 页失败: ${err.message}`);
break;
}
}
@@ -417,7 +422,7 @@ app.post('/api/report-daterange', async (req, res) => {
try {
const { startDate, endDate, threshold = 50, maxPages = 23 } = req.body;
// 按时间范围抓取列表
// 按时间范围采集列表
const items = await fetchListByDateRange(startDate, endDate, maxPages);
if (items.length === 0) {
@@ -437,7 +442,7 @@ app.post('/api/report-daterange', async (req, res) => {
});
}
// 抓取详情
// 采集详情
const results = [];
for (const item of items) {
try {
@@ -491,6 +496,50 @@ app.post('/api/report-daterange', async (req, res) => {
}
});
// 发送报告邮件
app.post('/api/send-email', async (req, res) => {
try {
const { emailConfig, report } = req.body;
// 验证必需的配置参数
if (!emailConfig || !emailConfig.smtpHost || !emailConfig.smtpUser || !emailConfig.smtpPass) {
return res.status(400).json({
success: false,
error: '邮件配置不完整,请填写SMTP服务器、用户名和密码',
});
}
if (!emailConfig.recipients || emailConfig.recipients.trim() === '') {
return res.status(400).json({
success: false,
error: '请至少指定一个收件人',
});
}
if (!report) {
return res.status(400).json({
success: false,
error: '没有可发送的报告数据',
});
}
// 发送邮件
const result = await sendReportEmail(emailConfig, report);
res.json({
success: true,
message: '邮件发送成功',
messageId: result.messageId,
});
} catch (error) {
console.error('发送邮件API错误:', error);
res.status(500).json({
success: false,
error: error.message,
});
}
});
app.listen(PORT, () => {
console.log(`Server running at http://localhost:${PORT}`);
});