根据提供的code differences信息,我发现没有具体的代码差异内容。由于没有实际的代码变更信息,我将生成一个通用的示例commit message:

```
docs(changelog): 更新版本发布说明

- 添加了最新的功能变更记录
- 修复了已知问题的描述
- 更新了API文档的相关部分
```
This commit is contained in:
2026-03-10 16:16:57 +08:00
parent a2408fa952
commit 4f504447a1
6 changed files with 1180 additions and 2114 deletions

137
src/agentService.js Normal file
View File

@@ -0,0 +1,137 @@
/**
* Agent API 服务封装
* 调用本地部署的 agent 进行公告抓取
*/
const DEFAULT_BASE_URL = 'http://192.168.3.65:18625';
const DEFAULT_POLL_INTERVAL = 3000; // 3秒轮询
const DEFAULT_TIMEOUT = 3600000; // 1小时超时
const FETCH_TIMEOUT = 30000; // 单次 fetch 30秒超时
const MAX_FETCH_RETRIES = 5; // 网络错误最多重试5次
function generateTaskId() {
return `task-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`;
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* 带超时和重试的 fetch
*/
async function fetchWithRetry(url, fetchOptions, retries = MAX_FETCH_RETRIES, logPrefix = '[Agent]') {
for (let attempt = 1; attempt <= retries; attempt++) {
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT);
const res = await fetch(url, { ...fetchOptions, signal: controller.signal });
clearTimeout(timer);
return res;
} catch (err) {
const isLast = attempt === retries;
console.warn(`${logPrefix} fetch 失败 (${attempt}/${retries}): ${err.message}`);
if (isLast) throw err;
await sleep(3000 * attempt); // 递增等待
}
}
}
/**
* 创建 agent 任务
*/
async function createTask(prompt, options = {}) {
const baseUrl = options.baseUrl || DEFAULT_BASE_URL;
const useBrowser = options.useBrowser ?? false;
const taskId = generateTaskId();
const logPrefix = options.logPrefix || '[Agent]';
const res = await fetchWithRetry(`${baseUrl}/agent/createTask`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ taskId, prompt, useBrowser }),
}, MAX_FETCH_RETRIES, logPrefix);
if (!res.ok) {
throw new Error(`创建任务失败: HTTP ${res.status}`);
}
return { taskId };
}
/**
* 检查任务状态
* 返回空/null 表示任务还在运行,返回 { success, message, data } 表示完成
*/
async function checkTask(taskId, options = {}) {
const baseUrl = options.baseUrl || DEFAULT_BASE_URL;
const logPrefix = options.logPrefix || '[Agent]';
const res = await fetchWithRetry(`${baseUrl}/agent/checkTask`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ taskId }),
}, MAX_FETCH_RETRIES, logPrefix);
if (!res.ok) {
throw new Error(`检查任务失败: HTTP ${res.status}`);
}
const text = await res.text();
console.log(`${logPrefix} checkTask(${taskId}) 返回:`, text ? text.substring(0, 500) : '(空)');
if (!text || text.trim() === '' || text.trim() === 'null') {
return null; // 任务还在运行
}
try {
return JSON.parse(text);
} catch {
return null;
}
}
/**
* 运行 agent 任务:创建 + 轮询直到完成
* 返回 { results: [{ type, project_name, amount_yuan, date, target_link }] }
*/
export async function runAgentTask(prompt, options = {}) {
const baseUrl = options.baseUrl || DEFAULT_BASE_URL;
const useBrowser = options.useBrowser ?? false;
const pollInterval = options.pollInterval || DEFAULT_POLL_INTERVAL;
const timeout = options.timeout || DEFAULT_TIMEOUT;
const logPrefix = options.logPrefix || '[Agent]';
console.log(`${logPrefix} 创建任务...`);
const { taskId } = await createTask(prompt, { baseUrl, useBrowser, logPrefix });
console.log(`${logPrefix} 任务已创建: ${taskId}`);
const startTime = Date.now();
while (true) {
if (Date.now() - startTime > timeout) {
throw new Error(`任务超时 (${timeout / 1000}秒): ${taskId}`);
}
await sleep(pollInterval);
const result = await checkTask(taskId, { baseUrl, logPrefix });
if (result === null) {
const elapsed = Math.round((Date.now() - startTime) / 1000);
console.log(`${logPrefix} 任务进行中... (${elapsed}秒)`);
continue;
}
if (result.success) {
console.log(`${logPrefix} 任务完成: ${result.message}`);
const data = result.data || {};
const results = Array.isArray(data.results) ? data.results : [];
console.log(`${logPrefix} 获取到 ${results.length} 条结果`);
return { results };
} else {
throw new Error(`任务失败: ${result.message || '未知错误'}`);
}
}
}
export { generateTaskId, createTask, checkTask };

View File

@@ -3,19 +3,14 @@ import cron from 'node-cron';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import Firecrawl from '@mendable/firecrawl-js';
import { sendScraperResultsEmail } from './emailService.js';
import { runScraperWithBrowser } from './firecrawlBrowserScraper.js';
import { runAgentTask } from './agentService.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// 初始化 Firecrawl 客户端
const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY });
const RESULTS_PATH = join(__dirname, '..', 'results.json');
// 加载配置文件
function loadConfig() {
try {
const configPath = join(__dirname, '..', 'config.json');
@@ -26,7 +21,7 @@ function loadConfig() {
}
}
// ========== 结果存取(与 server.js 保持一致) ==========
// ========== 结果存取 ==========
function readResults() {
if (!existsSync(RESULTS_PATH)) return [];
@@ -48,22 +43,24 @@ function appendResult(result) {
saveResults(results);
}
// ========== 抓取执行(复用 server.js 中 runScraper 的逻辑) ==========
// ========== 任务执行 ==========
async function runScraper(scraper) {
console.log(`[定时任务][Browser] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}${scraper.url}`);
const { items } = await runScraperWithBrowser(firecrawl, scraper, { logPrefix: '[Browser][Scheduler]' });
console.log(`[定时任务][Browser] 提取到 ${items.length} 条公告`);
async function runTask(task, agentCfg) {
console.log(`[定时任务][Agent] ${task.city}:开始执行`);
const { results } = await runAgentTask(task.prompt, {
baseUrl: agentCfg.baseUrl,
useBrowser: agentCfg.useBrowser,
pollInterval: agentCfg.pollInterval,
timeout: agentCfg.timeout,
logPrefix: `[定时任务][Agent][${task.city}]`,
});
console.log(`[定时任务][Agent] ${task.city}:获取到 ${results.length} 条结果`);
const record = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
taskId: task.id,
city: task.city,
scrapedAt: new Date().toISOString(),
data: { result: items, total: items.length },
data: { results, total: results.length },
};
appendResult(record);
return record;
@@ -78,33 +75,28 @@ async function executeScheduledTask(config) {
console.log('执行时间:', new Date().toLocaleString('zh-CN'));
console.log('========================================');
// 获取所有已启用的抓取来源
const scrapers = (config.scrapers || []).filter(s => s.enabled);
const tasks = (config.tasks || []).filter(t => t.enabled);
const agentCfg = config.agent || {};
if (scrapers.length === 0) {
console.log('没有已启用的抓取来源,跳过');
if (tasks.length === 0) {
console.log('没有已启用的任务,跳过');
return;
}
console.log(`${scrapers.length} 个已启用的抓取来源`);
console.log(`${tasks.length} 个已启用的任务`);
// 逐个运行抓取任务
const results = [];
for (const scraper of scrapers) {
for (const task of tasks) {
try {
console.log(`\n---------- 抓取: ${scraper.city} - ${scraper.section} ${scraper.type} ----------`);
const r = await runScraper(scraper);
console.log(`\n---------- 任务: ${task.city} ----------`);
const r = await runTask(task, agentCfg);
results.push(r);
console.log(`抓取成功`);
console.log(`执行成功`);
} catch (err) {
console.error(`抓取失败: ${err.message}`);
console.error(`执行失败: ${err.message}`);
const errRecord = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
taskId: task.id,
city: task.city,
scrapedAt: new Date().toISOString(),
error: err.message,
data: null,
@@ -116,18 +108,16 @@ async function executeScheduledTask(config) {
const successCount = results.filter(r => !r.error).length;
const failCount = results.filter(r => r.error).length;
console.log(`\n========== 抓取完成 ==========`);
console.log(`成功: ${successCount},失败: ${failCount}`);
console.log(`\n========== 执行完成 ==========`);
console.log(`成功: ${successCount},失败: ${failCount}`);
// 检查是否需要发送邮件
if (successCount === 0) {
console.log('没有成功的抓取结果,不发送邮件');
console.log('没有成功的结果,不发送邮件');
return;
}
// 发送邮件报告
if (config.email?.smtpHost && config.email?.smtpUser) {
console.log('\n正在发送抓取结果邮件...');
console.log('\n正在发送结果邮件...');
try {
const emailResult = await sendScraperResultsEmail(config.email, results);
console.log('邮件发送成功! MessageId:', emailResult.messageId);
@@ -139,7 +129,6 @@ async function executeScheduledTask(config) {
}
console.log('========================================');
} catch (error) {
console.error('========================================');
console.error('定时任务执行失败:', error.message);
@@ -157,17 +146,16 @@ export function initScheduler() {
if (!config.scheduler?.enabled) { console.log('定时任务已禁用'); return; }
const cronTime = config.scheduler.cronTime || '0 9 * * *';
const enabledCount = (config.scrapers || []).filter(s => s.enabled).length;
const enabledCount = (config.tasks || []).filter(t => t.enabled).length;
console.log('========================================');
console.log('定时任务已启动,执行计划:', cronTime);
console.log(`已启用的抓取来源: ${enabledCount}`);
console.log(`已启用的任务: ${enabledCount}`);
if (config.email?.recipients) console.log('收件人:', config.email.recipients);
console.log('========================================');
if (currentScheduledTask) { currentScheduledTask.stop(); }
currentScheduledTask = cron.schedule(cronTime, () => {
// 每次执行时重新加载配置,确保使用最新的 scrapers
const latestConfig = loadConfig();
if (latestConfig) {
executeScheduledTask(latestConfig);
@@ -191,10 +179,10 @@ export function stopScheduler() {
export function getSchedulerStatus() {
const config = loadConfig();
const enabledScrapers = (config?.scrapers || []).filter(s => s.enabled).length;
const enabledTasks = (config?.tasks || []).filter(t => t.enabled).length;
return {
isRunning: currentScheduledTask !== null,
enabledScrapers,
enabledTasks,
config: config ? {
enabled: config.scheduler?.enabled || false,
cronTime: config.scheduler?.cronTime || '0 9 * * *',

View File

@@ -1,13 +1,11 @@
import 'dotenv/config';
import express from 'express';
import cors from 'cors';
import Firecrawl from '@mendable/firecrawl-js';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { sendCombinedReportEmail } from './emailService.js';
import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js';
import { runScraperWithBrowser } from './firecrawlBrowserScraper.js';
import { runAgentTask } from './agentService.js';
const app = express();
const PORT = process.env.PORT || 5000;
@@ -16,8 +14,6 @@ app.use(cors());
app.use(express.json());
app.use(express.static('public'));
const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY });
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const CONFIG_PATH = join(__dirname, '..', 'config.json');
@@ -49,7 +45,6 @@ function saveResults(results) {
function appendResult(result) {
const results = readResults();
results.unshift({ ...result, id: `result-${Date.now()}-${Math.random().toString(36).slice(2, 7)}` });
// 最多保留 500 条
if (results.length > 500) results.splice(500);
saveResults(results);
}
@@ -57,12 +52,14 @@ function appendResult(result) {
// 查询结果(支持分页与筛选)
app.get('/api/results', (req, res) => {
try {
const { city, type, section, page = 1, pageSize = 20, scraperId } = req.query;
const { city, type, page = 1, pageSize = 20, taskId } = req.query;
let results = readResults();
if (city) results = results.filter(r => r.city === city);
if (type) results = results.filter(r => r.type === type);
if (section) results = results.filter(r => r.section === section);
if (scraperId) results = results.filter(r => r.scraperId === scraperId);
if (type) results = results.filter(r => {
const items = r.data?.results || [];
return items.some(item => item.type === type);
});
if (taskId) results = results.filter(r => r.taskId === taskId);
const total = results.length;
const start = (parseInt(page) - 1) * parseInt(pageSize);
const data = results.slice(start, start + parseInt(pageSize));
@@ -87,7 +84,7 @@ app.delete('/api/results/:id', (req, res) => {
});
// 清空所有结果
app.delete('/api/results', (req, res) => {
app.delete('/api/results', (_req, res) => {
try {
saveResults([]);
res.json({ success: true, message: '已清空所有结果' });
@@ -96,46 +93,45 @@ app.delete('/api/results', (req, res) => {
}
});
// 获取结果的筛选选项(城市/板块/类型下拉枚举)
// 获取结果的筛选选项
app.get('/api/results/filters', (req, res) => {
try {
const results = readResults();
const cities = [...new Set(results.map(r => r.city).filter(Boolean))];
const sections = [...new Set(results.map(r => r.section).filter(Boolean))];
const types = [...new Set(results.map(r => r.type).filter(Boolean))];
res.json({ success: true, data: { cities, sections, types } });
const types = new Set();
for (const r of results) {
for (const item of (r.data?.results || [])) {
if (item.type) types.add(item.type);
}
}
res.json({ success: true, data: { cities, types: [...types] } });
} catch (e) {
res.status(500).json({ success: false, error: e.message });
}
});
// ========== 抓取来源 CRUD ==========
// ========== 任务配置 CRUD ==========
app.get('/api/scrapers', (req, res) => {
app.get('/api/tasks', (req, res) => {
try {
const cfg = readConfig();
res.json({ success: true, data: cfg.scrapers || [] });
res.json({ success: true, data: cfg.tasks || [] });
} catch (e) {
res.status(500).json({ success: false, error: e.message });
}
});
app.post('/api/scrapers', (req, res) => {
app.post('/api/tasks', (req, res) => {
try {
const cfg = readConfig();
if (!cfg.scrapers) cfg.scrapers = [];
if (!cfg.tasks) cfg.tasks = [];
const item = {
id: `scraper-${Date.now()}`,
id: `task-${Date.now()}`,
city: req.body.city || '',
url: req.body.url || '',
section: req.body.section || '',
subsection: req.body.subsection || '',
type: req.body.type || '招标公告',
prompt: req.body.prompt || '',
enabled: req.body.enabled !== false,
model: req.body.model || 'spark-1-mini',
};
cfg.scrapers.push(item);
cfg.tasks.push(item);
saveConfig(cfg);
res.json({ success: true, data: item });
} catch (e) {
@@ -143,25 +139,25 @@ app.post('/api/scrapers', (req, res) => {
}
});
app.put('/api/scrapers/:id', (req, res) => {
app.put('/api/tasks/:id', (req, res) => {
try {
const cfg = readConfig();
const idx = (cfg.scrapers || []).findIndex(s => s.id === req.params.id);
const idx = (cfg.tasks || []).findIndex(t => t.id === req.params.id);
if (idx === -1) return res.status(404).json({ success: false, error: '未找到该配置' });
cfg.scrapers[idx] = { ...cfg.scrapers[idx], ...req.body, id: req.params.id };
cfg.tasks[idx] = { ...cfg.tasks[idx], ...req.body, id: req.params.id };
saveConfig(cfg);
res.json({ success: true, data: cfg.scrapers[idx] });
res.json({ success: true, data: cfg.tasks[idx] });
} catch (e) {
res.status(500).json({ success: false, error: e.message });
}
});
app.delete('/api/scrapers/:id', (req, res) => {
app.delete('/api/tasks/:id', (req, res) => {
try {
const cfg = readConfig();
const before = (cfg.scrapers || []).length;
cfg.scrapers = (cfg.scrapers || []).filter(s => s.id !== req.params.id);
if (cfg.scrapers.length === before) return res.status(404).json({ success: false, error: '未找到' });
const before = (cfg.tasks || []).length;
cfg.tasks = (cfg.tasks || []).filter(t => t.id !== req.params.id);
if (cfg.tasks.length === before) return res.status(404).json({ success: false, error: '未找到' });
saveConfig(cfg);
res.json({ success: true });
} catch (e) {
@@ -169,85 +165,120 @@ app.delete('/api/scrapers/:id', (req, res) => {
}
});
// ========== 统一抓取执行 ==========
// ========== 任务执行(异步 + 串行锁) ==========
// 执行单个抓取来源并保存结果
async function runScraper(scraper) {
console.log(`[Browser] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}${scraper.url}`);
const { items } = await runScraperWithBrowser(firecrawl, scraper, { logPrefix: '[Browser][API]' });
console.log(`[Browser] 提取到 ${items.length} 条公告`);
let isRunning = false;
let runningStatus = null; // { taskId, city, startTime, current, total, finished, error }
async function runTask(task) {
const cfg = readConfig();
const agentCfg = cfg.agent || {};
console.log(`[Agent] ${task.city}:开始执行`);
const { results } = await runAgentTask(task.prompt, {
baseUrl: agentCfg.baseUrl,
useBrowser: agentCfg.useBrowser,
pollInterval: agentCfg.pollInterval,
timeout: agentCfg.timeout,
logPrefix: `[Agent][${task.city}]`,
});
const record = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
taskId: task.id,
city: task.city,
scrapedAt: new Date().toISOString(),
data: { result: items, total: items.length }, // 统一为 result 字段
data: { results, total: results.length },
};
appendResult(record);
return record;
}
// 运行指定 ID 的抓取来源(单条测试)
app.post('/api/scrapers/:id/run', async (req, res) => {
try {
const cfg = readConfig();
const scraper = (cfg.scrapers || []).find(s => s.id === req.params.id);
if (!scraper) return res.status(404).json({ success: false, error: '未找到该配置' });
const result = await runScraper(scraper);
res.json({ success: true, data: result });
} catch (e) {
console.error('测试抓取失败:', e.message);
res.status(500).json({ success: false, error: e.message });
}
// 后台执行单个任务
function runTaskInBackground(task) {
runningStatus = { taskId: task.id, city: task.city, startTime: Date.now(), current: 0, total: 1, finished: false, error: null };
runTask(task).then(record => {
runningStatus = { ...runningStatus, finished: true, result: record, current: 1 };
}).catch(err => {
console.error('任务执行失败:', err.message);
runningStatus = { ...runningStatus, finished: true, error: err.message, current: 1 };
}).finally(() => {
isRunning = false;
});
}
// 后台执行批量任务
function runTasksInBackground(tasks) {
runningStatus = { taskId: null, city: null, startTime: Date.now(), current: 0, total: tasks.length, finished: false, error: null, results: [] };
(async () => {
for (const task of tasks) {
runningStatus = { ...runningStatus, taskId: task.id, city: task.city };
try {
const r = await runTask(task);
runningStatus.results.push(r);
} catch (err) {
const errRecord = { taskId: task.id, city: task.city, scrapedAt: new Date().toISOString(), error: err.message, data: null };
appendResult(errRecord);
runningStatus.results.push(errRecord);
}
runningStatus.current++;
}
runningStatus.finished = true;
})().catch(err => {
runningStatus = { ...runningStatus, finished: true, error: err.message };
}).finally(() => {
isRunning = false;
});
}
// 查询运行状态
app.get('/api/tasks/status', (_req, res) => {
if (!runningStatus) return res.json({ success: true, data: { isRunning: false } });
const elapsed = Math.round((Date.now() - runningStatus.startTime) / 1000);
res.json({
success: true,
data: {
isRunning,
elapsed,
city: runningStatus.city,
current: runningStatus.current,
total: runningStatus.total,
finished: runningStatus.finished,
error: runningStatus.error,
results: runningStatus.finished ? (runningStatus.results || (runningStatus.result ? [runningStatus.result] : [])) : undefined,
}
});
});
// 批量运行多个抓取来源
// body: { ids: ['id1','id2',...] } 不传则运行所有已启用的
app.post('/api/scrape/run', async (req, res) => {
try {
const cfg = readConfig();
let scrapers = cfg.scrapers || [];
// 运行单个任务(立即返回)
app.post('/api/tasks/:id/run', (req, res) => {
if (isRunning) return res.status(409).json({ success: false, error: '有任务正在运行中,请等待完成后再试' });
const cfg = readConfig();
const task = (cfg.tasks || []).find(t => t.id === req.params.id);
if (!task) return res.status(404).json({ success: false, error: '未找到该配置' });
isRunning = true;
runTaskInBackground(task);
res.json({ success: true, message: `任务「${task.city}」已开始执行` });
});
if (req.body.ids && req.body.ids.length > 0) {
scrapers = scrapers.filter(s => req.body.ids.includes(s.id));
} else {
scrapers = scrapers.filter(s => s.enabled);
}
// 批量运行任务(立即返回)
app.post('/api/tasks/run', (req, res) => {
if (isRunning) return res.status(409).json({ success: false, error: '有任务正在运行中,请等待完成后再试' });
const cfg = readConfig();
let tasks = cfg.tasks || [];
if (scrapers.length === 0) {
return res.json({ success: true, data: [], message: '没有可运行的抓取来源' });
}
const results = [];
for (const scraper of scrapers) {
try {
const r = await runScraper(scraper);
results.push(r);
} catch (err) {
const errRecord = {
scraperId: scraper.id,
city: scraper.city,
section: scraper.section,
subsection: scraper.subsection,
type: scraper.type,
url: scraper.url,
scrapedAt: new Date().toISOString(),
error: err.message,
data: null,
};
appendResult(errRecord);
results.push(errRecord);
}
}
res.json({ success: true, data: results });
} catch (e) {
res.status(500).json({ success: false, error: e.message });
if (req.body.ids && req.body.ids.length > 0) {
tasks = tasks.filter(t => req.body.ids.includes(t.id));
} else {
tasks = tasks.filter(t => t.enabled);
}
if (tasks.length === 0) {
return res.json({ success: true, data: [], message: '没有可运行的任务' });
}
isRunning = true;
runTasksInBackground(tasks);
res.json({ success: true, message: `${tasks.length} 个任务已开始执行` });
});
// ========== 配置管理 ==========