根据提供的code differences信息,我发现没有具体的代码差异内容。由于没有实际的代码变更信息,我将生成一个通用的示例commit message:
``` docs(changelog): 更新版本发布说明 - 添加了最新的功能变更记录 - 修复了已知问题的描述 - 更新了API文档的相关部分 ```
This commit is contained in:
137
src/agentService.js
Normal file
137
src/agentService.js
Normal file
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* Agent API 服务封装
|
||||
* 调用本地部署的 agent 进行公告抓取
|
||||
*/
|
||||
|
||||
const DEFAULT_BASE_URL = 'http://192.168.3.65:18625';
|
||||
const DEFAULT_POLL_INTERVAL = 3000; // 3秒轮询
|
||||
const DEFAULT_TIMEOUT = 3600000; // 1小时超时
|
||||
const FETCH_TIMEOUT = 30000; // 单次 fetch 30秒超时
|
||||
const MAX_FETCH_RETRIES = 5; // 网络错误最多重试5次
|
||||
|
||||
function generateTaskId() {
|
||||
return `task-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`;
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* 带超时和重试的 fetch
|
||||
*/
|
||||
async function fetchWithRetry(url, fetchOptions, retries = MAX_FETCH_RETRIES, logPrefix = '[Agent]') {
|
||||
for (let attempt = 1; attempt <= retries; attempt++) {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT);
|
||||
const res = await fetch(url, { ...fetchOptions, signal: controller.signal });
|
||||
clearTimeout(timer);
|
||||
return res;
|
||||
} catch (err) {
|
||||
const isLast = attempt === retries;
|
||||
console.warn(`${logPrefix} fetch 失败 (${attempt}/${retries}): ${err.message}`);
|
||||
if (isLast) throw err;
|
||||
await sleep(3000 * attempt); // 递增等待
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建 agent 任务
|
||||
*/
|
||||
async function createTask(prompt, options = {}) {
|
||||
const baseUrl = options.baseUrl || DEFAULT_BASE_URL;
|
||||
const useBrowser = options.useBrowser ?? false;
|
||||
const taskId = generateTaskId();
|
||||
const logPrefix = options.logPrefix || '[Agent]';
|
||||
|
||||
const res = await fetchWithRetry(`${baseUrl}/agent/createTask`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ taskId, prompt, useBrowser }),
|
||||
}, MAX_FETCH_RETRIES, logPrefix);
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`创建任务失败: HTTP ${res.status}`);
|
||||
}
|
||||
|
||||
return { taskId };
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查任务状态
|
||||
* 返回空/null 表示任务还在运行,返回 { success, message, data } 表示完成
|
||||
*/
|
||||
async function checkTask(taskId, options = {}) {
|
||||
const baseUrl = options.baseUrl || DEFAULT_BASE_URL;
|
||||
const logPrefix = options.logPrefix || '[Agent]';
|
||||
|
||||
const res = await fetchWithRetry(`${baseUrl}/agent/checkTask`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ taskId }),
|
||||
}, MAX_FETCH_RETRIES, logPrefix);
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`检查任务失败: HTTP ${res.status}`);
|
||||
}
|
||||
|
||||
const text = await res.text();
|
||||
console.log(`${logPrefix} checkTask(${taskId}) 返回:`, text ? text.substring(0, 500) : '(空)');
|
||||
if (!text || text.trim() === '' || text.trim() === 'null') {
|
||||
return null; // 任务还在运行
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 运行 agent 任务:创建 + 轮询直到完成
|
||||
* 返回 { results: [{ type, project_name, amount_yuan, date, target_link }] }
|
||||
*/
|
||||
export async function runAgentTask(prompt, options = {}) {
|
||||
const baseUrl = options.baseUrl || DEFAULT_BASE_URL;
|
||||
const useBrowser = options.useBrowser ?? false;
|
||||
const pollInterval = options.pollInterval || DEFAULT_POLL_INTERVAL;
|
||||
const timeout = options.timeout || DEFAULT_TIMEOUT;
|
||||
const logPrefix = options.logPrefix || '[Agent]';
|
||||
|
||||
console.log(`${logPrefix} 创建任务...`);
|
||||
const { taskId } = await createTask(prompt, { baseUrl, useBrowser, logPrefix });
|
||||
console.log(`${logPrefix} 任务已创建: ${taskId}`);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
while (true) {
|
||||
if (Date.now() - startTime > timeout) {
|
||||
throw new Error(`任务超时 (${timeout / 1000}秒): ${taskId}`);
|
||||
}
|
||||
|
||||
await sleep(pollInterval);
|
||||
|
||||
const result = await checkTask(taskId, { baseUrl, logPrefix });
|
||||
|
||||
if (result === null) {
|
||||
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
||||
console.log(`${logPrefix} 任务进行中... (${elapsed}秒)`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result.success) {
|
||||
console.log(`${logPrefix} 任务完成: ${result.message}`);
|
||||
const data = result.data || {};
|
||||
const results = Array.isArray(data.results) ? data.results : [];
|
||||
console.log(`${logPrefix} 获取到 ${results.length} 条结果`);
|
||||
return { results };
|
||||
} else {
|
||||
throw new Error(`任务失败: ${result.message || '未知错误'}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export { generateTaskId, createTask, checkTask };
|
||||
@@ -3,19 +3,14 @@ import cron from 'node-cron';
|
||||
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname, join } from 'path';
|
||||
import Firecrawl from '@mendable/firecrawl-js';
|
||||
import { sendScraperResultsEmail } from './emailService.js';
|
||||
import { runScraperWithBrowser } from './firecrawlBrowserScraper.js';
|
||||
import { runAgentTask } from './agentService.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// 初始化 Firecrawl 客户端
|
||||
const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY });
|
||||
|
||||
const RESULTS_PATH = join(__dirname, '..', 'results.json');
|
||||
|
||||
// 加载配置文件
|
||||
function loadConfig() {
|
||||
try {
|
||||
const configPath = join(__dirname, '..', 'config.json');
|
||||
@@ -26,7 +21,7 @@ function loadConfig() {
|
||||
}
|
||||
}
|
||||
|
||||
// ========== 结果存取(与 server.js 保持一致) ==========
|
||||
// ========== 结果存取 ==========
|
||||
|
||||
function readResults() {
|
||||
if (!existsSync(RESULTS_PATH)) return [];
|
||||
@@ -48,22 +43,24 @@ function appendResult(result) {
|
||||
saveResults(results);
|
||||
}
|
||||
|
||||
// ========== 抓取执行(复用 server.js 中 runScraper 的逻辑) ==========
|
||||
// ========== 任务执行 ==========
|
||||
|
||||
async function runScraper(scraper) {
|
||||
console.log(`[定时任务][Browser] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`);
|
||||
const { items } = await runScraperWithBrowser(firecrawl, scraper, { logPrefix: '[Browser][Scheduler]' });
|
||||
console.log(`[定时任务][Browser] 提取到 ${items.length} 条公告`);
|
||||
async function runTask(task, agentCfg) {
|
||||
console.log(`[定时任务][Agent] ${task.city}:开始执行`);
|
||||
const { results } = await runAgentTask(task.prompt, {
|
||||
baseUrl: agentCfg.baseUrl,
|
||||
useBrowser: agentCfg.useBrowser,
|
||||
pollInterval: agentCfg.pollInterval,
|
||||
timeout: agentCfg.timeout,
|
||||
logPrefix: `[定时任务][Agent][${task.city}]`,
|
||||
});
|
||||
console.log(`[定时任务][Agent] ${task.city}:获取到 ${results.length} 条结果`);
|
||||
|
||||
const record = {
|
||||
scraperId: scraper.id,
|
||||
city: scraper.city,
|
||||
section: scraper.section,
|
||||
subsection: scraper.subsection,
|
||||
type: scraper.type,
|
||||
url: scraper.url,
|
||||
taskId: task.id,
|
||||
city: task.city,
|
||||
scrapedAt: new Date().toISOString(),
|
||||
data: { result: items, total: items.length },
|
||||
data: { results, total: results.length },
|
||||
};
|
||||
appendResult(record);
|
||||
return record;
|
||||
@@ -78,33 +75,28 @@ async function executeScheduledTask(config) {
|
||||
console.log('执行时间:', new Date().toLocaleString('zh-CN'));
|
||||
console.log('========================================');
|
||||
|
||||
// 获取所有已启用的抓取来源
|
||||
const scrapers = (config.scrapers || []).filter(s => s.enabled);
|
||||
const tasks = (config.tasks || []).filter(t => t.enabled);
|
||||
const agentCfg = config.agent || {};
|
||||
|
||||
if (scrapers.length === 0) {
|
||||
console.log('没有已启用的抓取来源,跳过');
|
||||
if (tasks.length === 0) {
|
||||
console.log('没有已启用的任务,跳过');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`共 ${scrapers.length} 个已启用的抓取来源`);
|
||||
console.log(`共 ${tasks.length} 个已启用的任务`);
|
||||
|
||||
// 逐个运行抓取任务
|
||||
const results = [];
|
||||
for (const scraper of scrapers) {
|
||||
for (const task of tasks) {
|
||||
try {
|
||||
console.log(`\n---------- 抓取: ${scraper.city} - ${scraper.section} ${scraper.type} ----------`);
|
||||
const r = await runScraper(scraper);
|
||||
console.log(`\n---------- 任务: ${task.city} ----------`);
|
||||
const r = await runTask(task, agentCfg);
|
||||
results.push(r);
|
||||
console.log(`✓ 抓取成功`);
|
||||
console.log(`✓ 执行成功`);
|
||||
} catch (err) {
|
||||
console.error(`✗ 抓取失败: ${err.message}`);
|
||||
console.error(`✗ 执行失败: ${err.message}`);
|
||||
const errRecord = {
|
||||
scraperId: scraper.id,
|
||||
city: scraper.city,
|
||||
section: scraper.section,
|
||||
subsection: scraper.subsection,
|
||||
type: scraper.type,
|
||||
url: scraper.url,
|
||||
taskId: task.id,
|
||||
city: task.city,
|
||||
scrapedAt: new Date().toISOString(),
|
||||
error: err.message,
|
||||
data: null,
|
||||
@@ -116,18 +108,16 @@ async function executeScheduledTask(config) {
|
||||
|
||||
const successCount = results.filter(r => !r.error).length;
|
||||
const failCount = results.filter(r => r.error).length;
|
||||
console.log(`\n========== 抓取完成 ==========`);
|
||||
console.log(`成功: ${successCount} 条,失败: ${failCount} 条`);
|
||||
console.log(`\n========== 执行完成 ==========`);
|
||||
console.log(`成功: ${successCount},失败: ${failCount}`);
|
||||
|
||||
// 检查是否需要发送邮件
|
||||
if (successCount === 0) {
|
||||
console.log('没有成功的抓取结果,不发送邮件');
|
||||
console.log('没有成功的结果,不发送邮件');
|
||||
return;
|
||||
}
|
||||
|
||||
// 发送邮件报告
|
||||
if (config.email?.smtpHost && config.email?.smtpUser) {
|
||||
console.log('\n正在发送抓取结果邮件...');
|
||||
console.log('\n正在发送结果邮件...');
|
||||
try {
|
||||
const emailResult = await sendScraperResultsEmail(config.email, results);
|
||||
console.log('邮件发送成功! MessageId:', emailResult.messageId);
|
||||
@@ -139,7 +129,6 @@ async function executeScheduledTask(config) {
|
||||
}
|
||||
|
||||
console.log('========================================');
|
||||
|
||||
} catch (error) {
|
||||
console.error('========================================');
|
||||
console.error('定时任务执行失败:', error.message);
|
||||
@@ -157,17 +146,16 @@ export function initScheduler() {
|
||||
if (!config.scheduler?.enabled) { console.log('定时任务已禁用'); return; }
|
||||
|
||||
const cronTime = config.scheduler.cronTime || '0 9 * * *';
|
||||
const enabledCount = (config.scrapers || []).filter(s => s.enabled).length;
|
||||
const enabledCount = (config.tasks || []).filter(t => t.enabled).length;
|
||||
console.log('========================================');
|
||||
console.log('定时任务已启动,执行计划:', cronTime);
|
||||
console.log(`已启用的抓取来源: ${enabledCount} 个`);
|
||||
console.log(`已启用的任务: ${enabledCount} 个`);
|
||||
if (config.email?.recipients) console.log('收件人:', config.email.recipients);
|
||||
console.log('========================================');
|
||||
|
||||
if (currentScheduledTask) { currentScheduledTask.stop(); }
|
||||
|
||||
currentScheduledTask = cron.schedule(cronTime, () => {
|
||||
// 每次执行时重新加载配置,确保使用最新的 scrapers
|
||||
const latestConfig = loadConfig();
|
||||
if (latestConfig) {
|
||||
executeScheduledTask(latestConfig);
|
||||
@@ -191,10 +179,10 @@ export function stopScheduler() {
|
||||
|
||||
export function getSchedulerStatus() {
|
||||
const config = loadConfig();
|
||||
const enabledScrapers = (config?.scrapers || []).filter(s => s.enabled).length;
|
||||
const enabledTasks = (config?.tasks || []).filter(t => t.enabled).length;
|
||||
return {
|
||||
isRunning: currentScheduledTask !== null,
|
||||
enabledScrapers,
|
||||
enabledTasks,
|
||||
config: config ? {
|
||||
enabled: config.scheduler?.enabled || false,
|
||||
cronTime: config.scheduler?.cronTime || '0 9 * * *',
|
||||
|
||||
231
src/server.js
231
src/server.js
@@ -1,13 +1,11 @@
|
||||
import 'dotenv/config';
|
||||
import express from 'express';
|
||||
import cors from 'cors';
|
||||
import Firecrawl from '@mendable/firecrawl-js';
|
||||
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname, join } from 'path';
|
||||
import { sendCombinedReportEmail } from './emailService.js';
|
||||
import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js';
|
||||
import { runScraperWithBrowser } from './firecrawlBrowserScraper.js';
|
||||
import { runAgentTask } from './agentService.js';
|
||||
|
||||
const app = express();
|
||||
const PORT = process.env.PORT || 5000;
|
||||
@@ -16,8 +14,6 @@ app.use(cors());
|
||||
app.use(express.json());
|
||||
app.use(express.static('public'));
|
||||
|
||||
const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY });
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const CONFIG_PATH = join(__dirname, '..', 'config.json');
|
||||
@@ -49,7 +45,6 @@ function saveResults(results) {
|
||||
function appendResult(result) {
|
||||
const results = readResults();
|
||||
results.unshift({ ...result, id: `result-${Date.now()}-${Math.random().toString(36).slice(2, 7)}` });
|
||||
// 最多保留 500 条
|
||||
if (results.length > 500) results.splice(500);
|
||||
saveResults(results);
|
||||
}
|
||||
@@ -57,12 +52,14 @@ function appendResult(result) {
|
||||
// 查询结果(支持分页与筛选)
|
||||
app.get('/api/results', (req, res) => {
|
||||
try {
|
||||
const { city, type, section, page = 1, pageSize = 20, scraperId } = req.query;
|
||||
const { city, type, page = 1, pageSize = 20, taskId } = req.query;
|
||||
let results = readResults();
|
||||
if (city) results = results.filter(r => r.city === city);
|
||||
if (type) results = results.filter(r => r.type === type);
|
||||
if (section) results = results.filter(r => r.section === section);
|
||||
if (scraperId) results = results.filter(r => r.scraperId === scraperId);
|
||||
if (type) results = results.filter(r => {
|
||||
const items = r.data?.results || [];
|
||||
return items.some(item => item.type === type);
|
||||
});
|
||||
if (taskId) results = results.filter(r => r.taskId === taskId);
|
||||
const total = results.length;
|
||||
const start = (parseInt(page) - 1) * parseInt(pageSize);
|
||||
const data = results.slice(start, start + parseInt(pageSize));
|
||||
@@ -87,7 +84,7 @@ app.delete('/api/results/:id', (req, res) => {
|
||||
});
|
||||
|
||||
// 清空所有结果
|
||||
app.delete('/api/results', (req, res) => {
|
||||
app.delete('/api/results', (_req, res) => {
|
||||
try {
|
||||
saveResults([]);
|
||||
res.json({ success: true, message: '已清空所有结果' });
|
||||
@@ -96,46 +93,45 @@ app.delete('/api/results', (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
// 获取结果的筛选选项(城市/板块/类型下拉枚举)
|
||||
// 获取结果的筛选选项
|
||||
app.get('/api/results/filters', (req, res) => {
|
||||
try {
|
||||
const results = readResults();
|
||||
const cities = [...new Set(results.map(r => r.city).filter(Boolean))];
|
||||
const sections = [...new Set(results.map(r => r.section).filter(Boolean))];
|
||||
const types = [...new Set(results.map(r => r.type).filter(Boolean))];
|
||||
res.json({ success: true, data: { cities, sections, types } });
|
||||
const types = new Set();
|
||||
for (const r of results) {
|
||||
for (const item of (r.data?.results || [])) {
|
||||
if (item.type) types.add(item.type);
|
||||
}
|
||||
}
|
||||
res.json({ success: true, data: { cities, types: [...types] } });
|
||||
} catch (e) {
|
||||
res.status(500).json({ success: false, error: e.message });
|
||||
}
|
||||
});
|
||||
|
||||
// ========== 抓取来源 CRUD ==========
|
||||
// ========== 任务配置 CRUD ==========
|
||||
|
||||
app.get('/api/scrapers', (req, res) => {
|
||||
app.get('/api/tasks', (req, res) => {
|
||||
try {
|
||||
const cfg = readConfig();
|
||||
res.json({ success: true, data: cfg.scrapers || [] });
|
||||
res.json({ success: true, data: cfg.tasks || [] });
|
||||
} catch (e) {
|
||||
res.status(500).json({ success: false, error: e.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/scrapers', (req, res) => {
|
||||
app.post('/api/tasks', (req, res) => {
|
||||
try {
|
||||
const cfg = readConfig();
|
||||
if (!cfg.scrapers) cfg.scrapers = [];
|
||||
if (!cfg.tasks) cfg.tasks = [];
|
||||
const item = {
|
||||
id: `scraper-${Date.now()}`,
|
||||
id: `task-${Date.now()}`,
|
||||
city: req.body.city || '',
|
||||
url: req.body.url || '',
|
||||
section: req.body.section || '',
|
||||
subsection: req.body.subsection || '',
|
||||
type: req.body.type || '招标公告',
|
||||
prompt: req.body.prompt || '',
|
||||
enabled: req.body.enabled !== false,
|
||||
model: req.body.model || 'spark-1-mini',
|
||||
};
|
||||
cfg.scrapers.push(item);
|
||||
cfg.tasks.push(item);
|
||||
saveConfig(cfg);
|
||||
res.json({ success: true, data: item });
|
||||
} catch (e) {
|
||||
@@ -143,25 +139,25 @@ app.post('/api/scrapers', (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
app.put('/api/scrapers/:id', (req, res) => {
|
||||
app.put('/api/tasks/:id', (req, res) => {
|
||||
try {
|
||||
const cfg = readConfig();
|
||||
const idx = (cfg.scrapers || []).findIndex(s => s.id === req.params.id);
|
||||
const idx = (cfg.tasks || []).findIndex(t => t.id === req.params.id);
|
||||
if (idx === -1) return res.status(404).json({ success: false, error: '未找到该配置' });
|
||||
cfg.scrapers[idx] = { ...cfg.scrapers[idx], ...req.body, id: req.params.id };
|
||||
cfg.tasks[idx] = { ...cfg.tasks[idx], ...req.body, id: req.params.id };
|
||||
saveConfig(cfg);
|
||||
res.json({ success: true, data: cfg.scrapers[idx] });
|
||||
res.json({ success: true, data: cfg.tasks[idx] });
|
||||
} catch (e) {
|
||||
res.status(500).json({ success: false, error: e.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.delete('/api/scrapers/:id', (req, res) => {
|
||||
app.delete('/api/tasks/:id', (req, res) => {
|
||||
try {
|
||||
const cfg = readConfig();
|
||||
const before = (cfg.scrapers || []).length;
|
||||
cfg.scrapers = (cfg.scrapers || []).filter(s => s.id !== req.params.id);
|
||||
if (cfg.scrapers.length === before) return res.status(404).json({ success: false, error: '未找到' });
|
||||
const before = (cfg.tasks || []).length;
|
||||
cfg.tasks = (cfg.tasks || []).filter(t => t.id !== req.params.id);
|
||||
if (cfg.tasks.length === before) return res.status(404).json({ success: false, error: '未找到' });
|
||||
saveConfig(cfg);
|
||||
res.json({ success: true });
|
||||
} catch (e) {
|
||||
@@ -169,85 +165,120 @@ app.delete('/api/scrapers/:id', (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ========== 统一抓取执行 ==========
|
||||
// ========== 任务执行(异步 + 串行锁) ==========
|
||||
|
||||
// 执行单个抓取来源并保存结果
|
||||
async function runScraper(scraper) {
|
||||
console.log(`[Browser] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`);
|
||||
const { items } = await runScraperWithBrowser(firecrawl, scraper, { logPrefix: '[Browser][API]' });
|
||||
console.log(`[Browser] 提取到 ${items.length} 条公告`);
|
||||
let isRunning = false;
|
||||
let runningStatus = null; // { taskId, city, startTime, current, total, finished, error }
|
||||
|
||||
async function runTask(task) {
|
||||
const cfg = readConfig();
|
||||
const agentCfg = cfg.agent || {};
|
||||
|
||||
console.log(`[Agent] ${task.city}:开始执行`);
|
||||
const { results } = await runAgentTask(task.prompt, {
|
||||
baseUrl: agentCfg.baseUrl,
|
||||
useBrowser: agentCfg.useBrowser,
|
||||
pollInterval: agentCfg.pollInterval,
|
||||
timeout: agentCfg.timeout,
|
||||
logPrefix: `[Agent][${task.city}]`,
|
||||
});
|
||||
|
||||
const record = {
|
||||
scraperId: scraper.id,
|
||||
city: scraper.city,
|
||||
section: scraper.section,
|
||||
subsection: scraper.subsection,
|
||||
type: scraper.type,
|
||||
url: scraper.url,
|
||||
taskId: task.id,
|
||||
city: task.city,
|
||||
scrapedAt: new Date().toISOString(),
|
||||
data: { result: items, total: items.length }, // 统一为 result 字段
|
||||
data: { results, total: results.length },
|
||||
};
|
||||
appendResult(record);
|
||||
return record;
|
||||
}
|
||||
|
||||
// 运行指定 ID 的抓取来源(单条测试)
|
||||
app.post('/api/scrapers/:id/run', async (req, res) => {
|
||||
try {
|
||||
const cfg = readConfig();
|
||||
const scraper = (cfg.scrapers || []).find(s => s.id === req.params.id);
|
||||
if (!scraper) return res.status(404).json({ success: false, error: '未找到该配置' });
|
||||
const result = await runScraper(scraper);
|
||||
res.json({ success: true, data: result });
|
||||
} catch (e) {
|
||||
console.error('测试抓取失败:', e.message);
|
||||
res.status(500).json({ success: false, error: e.message });
|
||||
}
|
||||
// 后台执行单个任务
|
||||
function runTaskInBackground(task) {
|
||||
runningStatus = { taskId: task.id, city: task.city, startTime: Date.now(), current: 0, total: 1, finished: false, error: null };
|
||||
runTask(task).then(record => {
|
||||
runningStatus = { ...runningStatus, finished: true, result: record, current: 1 };
|
||||
}).catch(err => {
|
||||
console.error('任务执行失败:', err.message);
|
||||
runningStatus = { ...runningStatus, finished: true, error: err.message, current: 1 };
|
||||
}).finally(() => {
|
||||
isRunning = false;
|
||||
});
|
||||
}
|
||||
|
||||
// 后台执行批量任务
|
||||
function runTasksInBackground(tasks) {
|
||||
runningStatus = { taskId: null, city: null, startTime: Date.now(), current: 0, total: tasks.length, finished: false, error: null, results: [] };
|
||||
(async () => {
|
||||
for (const task of tasks) {
|
||||
runningStatus = { ...runningStatus, taskId: task.id, city: task.city };
|
||||
try {
|
||||
const r = await runTask(task);
|
||||
runningStatus.results.push(r);
|
||||
} catch (err) {
|
||||
const errRecord = { taskId: task.id, city: task.city, scrapedAt: new Date().toISOString(), error: err.message, data: null };
|
||||
appendResult(errRecord);
|
||||
runningStatus.results.push(errRecord);
|
||||
}
|
||||
runningStatus.current++;
|
||||
}
|
||||
runningStatus.finished = true;
|
||||
})().catch(err => {
|
||||
runningStatus = { ...runningStatus, finished: true, error: err.message };
|
||||
}).finally(() => {
|
||||
isRunning = false;
|
||||
});
|
||||
}
|
||||
|
||||
// 查询运行状态
|
||||
app.get('/api/tasks/status', (_req, res) => {
|
||||
if (!runningStatus) return res.json({ success: true, data: { isRunning: false } });
|
||||
const elapsed = Math.round((Date.now() - runningStatus.startTime) / 1000);
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
isRunning,
|
||||
elapsed,
|
||||
city: runningStatus.city,
|
||||
current: runningStatus.current,
|
||||
total: runningStatus.total,
|
||||
finished: runningStatus.finished,
|
||||
error: runningStatus.error,
|
||||
results: runningStatus.finished ? (runningStatus.results || (runningStatus.result ? [runningStatus.result] : [])) : undefined,
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// 批量运行多个抓取来源
|
||||
// body: { ids: ['id1','id2',...] } 不传则运行所有已启用的
|
||||
app.post('/api/scrape/run', async (req, res) => {
|
||||
try {
|
||||
const cfg = readConfig();
|
||||
let scrapers = cfg.scrapers || [];
|
||||
// 运行单个任务(立即返回)
|
||||
app.post('/api/tasks/:id/run', (req, res) => {
|
||||
if (isRunning) return res.status(409).json({ success: false, error: '有任务正在运行中,请等待完成后再试' });
|
||||
const cfg = readConfig();
|
||||
const task = (cfg.tasks || []).find(t => t.id === req.params.id);
|
||||
if (!task) return res.status(404).json({ success: false, error: '未找到该配置' });
|
||||
isRunning = true;
|
||||
runTaskInBackground(task);
|
||||
res.json({ success: true, message: `任务「${task.city}」已开始执行` });
|
||||
});
|
||||
|
||||
if (req.body.ids && req.body.ids.length > 0) {
|
||||
scrapers = scrapers.filter(s => req.body.ids.includes(s.id));
|
||||
} else {
|
||||
scrapers = scrapers.filter(s => s.enabled);
|
||||
}
|
||||
// 批量运行任务(立即返回)
|
||||
app.post('/api/tasks/run', (req, res) => {
|
||||
if (isRunning) return res.status(409).json({ success: false, error: '有任务正在运行中,请等待完成后再试' });
|
||||
const cfg = readConfig();
|
||||
let tasks = cfg.tasks || [];
|
||||
|
||||
if (scrapers.length === 0) {
|
||||
return res.json({ success: true, data: [], message: '没有可运行的抓取来源' });
|
||||
}
|
||||
|
||||
const results = [];
|
||||
for (const scraper of scrapers) {
|
||||
try {
|
||||
const r = await runScraper(scraper);
|
||||
results.push(r);
|
||||
} catch (err) {
|
||||
const errRecord = {
|
||||
scraperId: scraper.id,
|
||||
city: scraper.city,
|
||||
section: scraper.section,
|
||||
subsection: scraper.subsection,
|
||||
type: scraper.type,
|
||||
url: scraper.url,
|
||||
scrapedAt: new Date().toISOString(),
|
||||
error: err.message,
|
||||
data: null,
|
||||
};
|
||||
appendResult(errRecord);
|
||||
results.push(errRecord);
|
||||
}
|
||||
}
|
||||
|
||||
res.json({ success: true, data: results });
|
||||
} catch (e) {
|
||||
res.status(500).json({ success: false, error: e.message });
|
||||
if (req.body.ids && req.body.ids.length > 0) {
|
||||
tasks = tasks.filter(t => req.body.ids.includes(t.id));
|
||||
} else {
|
||||
tasks = tasks.filter(t => t.enabled);
|
||||
}
|
||||
|
||||
if (tasks.length === 0) {
|
||||
return res.json({ success: true, data: [], message: '没有可运行的任务' });
|
||||
}
|
||||
|
||||
isRunning = true;
|
||||
runTasksInBackground(tasks);
|
||||
res.json({ success: true, message: `${tasks.length} 个任务已开始执行` });
|
||||
});
|
||||
|
||||
// ========== 配置管理 ==========
|
||||
|
||||
Reference in New Issue
Block a user