feat(readme): 对部分文本进行格式调整,包括金额数字空格分隔、API 参数说明优化、标题层级对齐等,提升可读性。
```
This commit is contained in:
2025-12-15 10:36:18 +08:00
parent 745faa0ecc
commit b044e918aa
9 changed files with 949 additions and 80 deletions

View File

@@ -3,6 +3,7 @@ import cors from 'cors';
import axios from 'axios';
import * as cheerio from 'cheerio';
import iconv from 'iconv-lite';
import { sendReportEmail } from './emailService.js';
const app = express();
const PORT = 3000;
@@ -33,24 +34,24 @@ function isDateInRange(dateStr, startDate, endDate) {
return true;
}
// 按时间范围抓取多页列表
// 按时间范围采集多页列表
async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
const allItems = [];
let shouldContinue = true;
let pageIndex = 0;
console.log(`开始按时间范围抓取: ${startDate || '不限'}${endDate || '不限'}`);
console.log(`开始按时间范围采集: ${startDate || '不限'}${endDate || '不限'}`);
while (shouldContinue && pageIndex < maxPages) {
const pageUrl = getPageUrl(pageIndex);
console.log(`正在抓取${pageIndex + 1} 页: ${pageUrl}`);
console.log(`正在采集${pageIndex + 1} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const items = parseList(html);
if (items.length === 0) {
console.log(`${pageIndex + 1} 页没有数据,停止抓取`);
console.log(`${pageIndex + 1} 页没有数据,停止采集`);
break;
}
@@ -70,7 +71,7 @@ async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
}
if (allItemsBeforeRange && startDate) {
console.log(`${pageIndex + 1} 页所有项目都早于起始日期,停止抓取`);
console.log(`${pageIndex + 1} 页所有项目都早于起始日期,停止采集`);
shouldContinue = false;
}
@@ -82,12 +83,12 @@ async function fetchListByDateRange(startDate, endDate, maxPages = 23) {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`抓取${pageIndex + 1} 页失败: ${err.message}`);
console.error(`采集${pageIndex + 1} 页失败: ${err.message}`);
break;
}
}
console.log(`总共抓取${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`);
console.log(`总共采集${pageIndex} 页,找到 ${allItems.length} 条符合条件的公告`);
return allItems;
}
@@ -207,6 +208,10 @@ function parseDetail(html) {
}
function extractBudget(content) {
// 预处理内容:去除数字之间的换行符和空白字符
// 这样可以匹配被换行符分隔的数字,例如 "1\n1\n0\n9\n0\n0" -> "110900"
let cleanedContent = content.replace(/(\d)\s*[\n\r]\s*(?=\d)/g, '$1');
// 直接定义金额匹配模式(从高优先级到低优先级)
const patterns = [
// 优先级1: 带货币符号的万元
@@ -230,7 +235,7 @@ function extractBudget(content) {
// 遍历所有模式,找到优先级最高的匹配
for (const pattern of patterns) {
const match = content.match(pattern.regex);
const match = cleanedContent.match(pattern.regex);
if (match && pattern.priority < bestPriority) {
// 清理数字中的逗号并转换
const numberStr = match[1].replace(/[,]/g, '');
@@ -329,21 +334,21 @@ app.post('/api/report', async (req, res) => {
const { limit = 15, threshold = 50, url } = req.body;
const targetUrl = url && url.trim() !== '' ? url : BASE_URL;
// 按需抓取多页以获取足够的数据
// 按需采集多页以获取足够的数据
const items = [];
let pageIndex = 0;
const maxPagesToFetch = Math.ceil(limit / 10) + 1; // 假设每页约10条多抓一页保险
while (items.length < limit && pageIndex < maxPagesToFetch) {
const pageUrl = getPageUrl(pageIndex, targetUrl);
console.log(`正在抓取${pageIndex + 1} 页: ${pageUrl}`);
console.log(`正在采集${pageIndex + 1} 页: ${pageUrl}`);
try {
const html = await fetchHtml(pageUrl);
const pageItems = parseList(html);
if (pageItems.length === 0) {
console.log(`${pageIndex + 1} 页没有数据,停止抓取`);
console.log(`${pageIndex + 1} 页没有数据,停止采集`);
break;
}
@@ -354,7 +359,7 @@ app.post('/api/report', async (req, res) => {
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (err) {
console.error(`抓取${pageIndex + 1} 页失败: ${err.message}`);
console.error(`采集${pageIndex + 1} 页失败: ${err.message}`);
break;
}
}
@@ -417,7 +422,7 @@ app.post('/api/report-daterange', async (req, res) => {
try {
const { startDate, endDate, threshold = 50, maxPages = 23 } = req.body;
// 按时间范围抓取列表
// 按时间范围采集列表
const items = await fetchListByDateRange(startDate, endDate, maxPages);
if (items.length === 0) {
@@ -437,7 +442,7 @@ app.post('/api/report-daterange', async (req, res) => {
});
}
// 抓取详情
// 采集详情
const results = [];
for (const item of items) {
try {
@@ -491,6 +496,50 @@ app.post('/api/report-daterange', async (req, res) => {
}
});
// 发送报告邮件
app.post('/api/send-email', async (req, res) => {
try {
const { emailConfig, report } = req.body;
// 验证必需的配置参数
if (!emailConfig || !emailConfig.smtpHost || !emailConfig.smtpUser || !emailConfig.smtpPass) {
return res.status(400).json({
success: false,
error: '邮件配置不完整,请填写SMTP服务器、用户名和密码',
});
}
if (!emailConfig.recipients || emailConfig.recipients.trim() === '') {
return res.status(400).json({
success: false,
error: '请至少指定一个收件人',
});
}
if (!report) {
return res.status(400).json({
success: false,
error: '没有可发送的报告数据',
});
}
// 发送邮件
const result = await sendReportEmail(emailConfig, report);
res.json({
success: true,
message: '邮件发送成功',
messageId: result.messageId,
});
} catch (error) {
console.error('发送邮件API错误:', error);
res.status(500).json({
success: false,
error: error.message,
});
}
});
app.listen(PORT, () => {
console.log(`Server running at http://localhost:${PORT}`);
});