Compare commits
1 Commits
agent_new
...
agent_new2
| Author | SHA1 | Date | |
|---|---|---|---|
| a2408fa952 |
2
.env
2
.env
@@ -2,4 +2,4 @@
|
|||||||
PORT=5000
|
PORT=5000
|
||||||
|
|
||||||
# Firecrawl API Key(在 https://www.firecrawl.dev/app/api-keys 获取)
|
# Firecrawl API Key(在 https://www.firecrawl.dev/app/api-keys 获取)
|
||||||
FIRECRAWL_API_KEY=fc-354d1bbd965d482c977796ff534e15ca
|
FIRECRAWL_API_KEY=fc-595dd922780442f8a907202666a522ef
|
||||||
@@ -906,7 +906,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<p style="color:#888;font-size:13px;margin:-8px 0 18px;">通过配置 URL 和提示词,使用 Firecrawl Agent
|
<p style="color:#888;font-size:13px;margin:-8px 0 18px;">通过配置 URL 和提示词,使用 Firecrawl Browser Sandbox
|
||||||
抓取任意网页数据。结果会自动保存,可在「抓取结果」页查看历史。</p>
|
抓取任意网页数据。结果会自动保存,可在「抓取结果」页查看历史。</p>
|
||||||
|
|
||||||
<div class="scrapers-table-wrap">
|
<div class="scrapers-table-wrap">
|
||||||
@@ -1057,7 +1057,7 @@
|
|||||||
<td>
|
<td>
|
||||||
<div class="action-btns">
|
<div class="action-btns">
|
||||||
<button class="btn-sm btn-edit" onclick="openScraperModal('${s.id}')" title="编辑">编辑</button>
|
<button class="btn-sm btn-edit" onclick="openScraperModal('${s.id}')" title="编辑">编辑</button>
|
||||||
<button class="btn-sm btn-run" onclick="runScraper('${s.id}')" title="测试运行">测试</button>
|
<button class="btn-sm btn-run" onclick="runScraper('${s.id}', this)" title="测试运行">测试</button>
|
||||||
<button class="btn-sm ${s.enabled ? 'btn-toggle-on' : 'btn-toggle-off'}" onclick="toggleScraper('${s.id}', ${!s.enabled})" title="切换启用状态">${s.enabled ? '禁用' : '启用'}</button>
|
<button class="btn-sm ${s.enabled ? 'btn-toggle-on' : 'btn-toggle-off'}" onclick="toggleScraper('${s.id}', ${!s.enabled})" title="切换启用状态">${s.enabled ? '禁用' : '启用'}</button>
|
||||||
<button class="btn-sm btn-delete" onclick="deleteScraper('${s.id}')" title="删除">删除</button>
|
<button class="btn-sm btn-delete" onclick="deleteScraper('${s.id}')" title="删除">删除</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -1140,12 +1140,19 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runScraper(id) {
|
async function runScraper(id, btnEl) {
|
||||||
const item = scrapersList.find(s => s.id === id);
|
const item = scrapersList.find(s => s.id === id);
|
||||||
const resultDiv = document.getElementById('scraperRunResult');
|
const resultDiv = document.getElementById('scraperRunResult');
|
||||||
const contentDiv = document.getElementById('scraperRunResultContent');
|
const contentDiv = document.getElementById('scraperRunResultContent');
|
||||||
resultDiv.style.display = 'block';
|
resultDiv.style.display = 'block';
|
||||||
contentDiv.textContent = `正在测试抓取「${item?.city} - ${item?.type}」,请稍候...`;
|
contentDiv.textContent = `正在测试抓取「${item?.city} - ${item?.type}」,请稍候...`;
|
||||||
|
resultDiv.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
|
||||||
|
|
||||||
|
const originalText = btnEl ? btnEl.textContent : '';
|
||||||
|
if (btnEl) {
|
||||||
|
btnEl.disabled = true;
|
||||||
|
btnEl.textContent = '测试中...';
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`/api/scrapers/${id}/run`, { method: 'POST' });
|
const res = await fetch(`/api/scrapers/${id}/run`, { method: 'POST' });
|
||||||
const json = await res.json();
|
const json = await res.json();
|
||||||
@@ -1153,6 +1160,11 @@
|
|||||||
contentDiv.textContent = JSON.stringify(json.data, null, 2);
|
contentDiv.textContent = JSON.stringify(json.data, null, 2);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
contentDiv.textContent = '❌ 测试失败: ' + err.message;
|
contentDiv.textContent = '❌ 测试失败: ' + err.message;
|
||||||
|
} finally {
|
||||||
|
if (btnEl) {
|
||||||
|
btnEl.disabled = false;
|
||||||
|
btnEl.textContent = originalText || '测试';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1213,4 +1225,4 @@
|
|||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
460
results.json
460
results.json
@@ -1,459 +1 @@
|
|||||||
[
|
[]
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772762494299",
|
|
||||||
"city": "南京市",
|
|
||||||
"section": "房建市政",
|
|
||||||
"subsection": "工程类、服务类",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/buildService1.html",
|
|
||||||
"scrapedAt": "2026-03-09T03:19:39.057Z",
|
|
||||||
"data": {
|
|
||||||
"result": [
|
|
||||||
{
|
|
||||||
"title": "【澄清公告】文化谷东路 (东吉大道-创新大道)、创新大道(研发二路-文化谷东路)一期道路建设项目 施工",
|
|
||||||
"amount": "3180",
|
|
||||||
"date": "2026-03-09",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260309/d8483e91-9a7b-4425-a860-c5c9b45365f0.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "【澄清公告】南京大学仙林校区学生宿舍楼第28-30幢 20KV变电所工程",
|
|
||||||
"amount": "528",
|
|
||||||
"date": "2026-03-09",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260309/5a14fb16-fbd1-44d0-9f3f-90823f3639dd.html"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"total": 2
|
|
||||||
},
|
|
||||||
"id": "result-1773026379058-wd4gj"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772762354799",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
|
|
||||||
"scrapedAt": "2026-03-06T06:57:46.881Z",
|
|
||||||
"data": {
|
|
||||||
"result": [
|
|
||||||
{
|
|
||||||
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
|
|
||||||
"amount": "5,923,797元",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"total": 1
|
|
||||||
},
|
|
||||||
"id": "result-1772780266881-odaof"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772762354799",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
|
|
||||||
"scrapedAt": "2026-03-06T06:42:40.619Z",
|
|
||||||
"data": {
|
|
||||||
"result": [
|
|
||||||
{
|
|
||||||
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
|
|
||||||
"amount": "5923797元",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"total": 1
|
|
||||||
},
|
|
||||||
"id": "result-1772779360620-xr7ue"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772762354799",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
|
|
||||||
"scrapedAt": "2026-03-06T04:02:43.530Z",
|
|
||||||
"data": {
|
|
||||||
"items": [
|
|
||||||
{
|
|
||||||
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
|
|
||||||
"amount": "5923797元",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"total": 1
|
|
||||||
},
|
|
||||||
"id": "result-1772769763530-3axw2"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772762354799",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
|
|
||||||
"scrapedAt": "2026-03-06T02:51:39.452Z",
|
|
||||||
"error": "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value.",
|
|
||||||
"data": null,
|
|
||||||
"id": "result-1772765499452-ynhn0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772762494299",
|
|
||||||
"city": "南京市",
|
|
||||||
"section": "房建市政",
|
|
||||||
"subsection": "工程类",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/buildService1.html",
|
|
||||||
"scrapedAt": "2026-03-06T02:32:03.818Z",
|
|
||||||
"data": {
|
|
||||||
"success": true,
|
|
||||||
"status": "completed",
|
|
||||||
"data": {
|
|
||||||
"target_date": "2026-03-06",
|
|
||||||
"notice_count": 0,
|
|
||||||
"notices": [],
|
|
||||||
"message": "截至当前时间(2026-03-06 02:19),网站尚未发布今日(2026-03-06)的招标公告。最新公告日期为2026-03-05。",
|
|
||||||
"recent_notices_fallback": [
|
|
||||||
{
|
|
||||||
"title": "麒麟科创园具身智能训练场装修项目",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/a20ee94f-b76e-4f88-b8df-2847c2f35ce1.html",
|
|
||||||
"amount": "5660000.00"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "站东13号(MCd080-07-08)地块10kV电力杆线迁改工程",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/f0b99840-e8de-4a08-b2ba-3e57a347864c.html",
|
|
||||||
"amount": "9543100.00"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "【澄清公告】螺丝桥大街北延(月安街至应天大街段)道路工程",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/1b3da624-fe86-4755-a268-a1967cd9d489.html",
|
|
||||||
"amount": "900万元"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "建邺路150-164号等9个地块城中村改造项目",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/6f4fcf2f-d198-4814-acd8-9817ef559a0c.html",
|
|
||||||
"amount": "1,900,000.00"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程",
|
|
||||||
"date": "2026-03-05",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/11ec2263-4ed1-4115-bdd1-0a6dcbf1d6c1.html",
|
|
||||||
"amount": "11320.01万元"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"model": "spark-1-mini",
|
|
||||||
"expiresAt": "2026-03-07T02:32:00.316Z",
|
|
||||||
"creditsUsed": 0
|
|
||||||
},
|
|
||||||
"id": "result-1772764323818-mj8km"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772762354799",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/zbgg/index.shtml",
|
|
||||||
"scrapedAt": "2026-03-06T02:19:27.580Z",
|
|
||||||
"data": {
|
|
||||||
"success": true,
|
|
||||||
"status": "completed",
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
|
|
||||||
"project_amount": "5,923,797元 (最高投标限价)",
|
|
||||||
"publish_date": "2026-03-05",
|
|
||||||
"detail_url": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"model": "spark-1-mini",
|
|
||||||
"expiresAt": "2026-03-07T02:19:24.631Z",
|
|
||||||
"creditsUsed": 0
|
|
||||||
},
|
|
||||||
"id": "result-1772763567581-ahz62"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772699302521",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/jyxx/slgc/index.shtml",
|
|
||||||
"scrapedAt": "2026-03-05T10:05:46.148Z",
|
|
||||||
"data": {
|
|
||||||
"success": true,
|
|
||||||
"status": "completed",
|
|
||||||
"data": {
|
|
||||||
"announcements": [
|
|
||||||
{
|
|
||||||
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
|
|
||||||
"project_amount": "最高投标限价:5923797元",
|
|
||||||
"publish_date": "2026-03-05",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"model": "spark-1-mini",
|
|
||||||
"expiresAt": "2026-03-06T10:05:45.297Z",
|
|
||||||
"creditsUsed": 180
|
|
||||||
},
|
|
||||||
"id": "result-1772705146148-kn0ko"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772699302521",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/index.shtml",
|
|
||||||
"scrapedAt": "2026-03-05T10:02:01.153Z",
|
|
||||||
"data": {
|
|
||||||
"success": true,
|
|
||||||
"status": "completed",
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"标题": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
|
|
||||||
"项目金额": "5,923,797元",
|
|
||||||
"发布日期": "2026-03-05",
|
|
||||||
"详情页完整URL": "http://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"model": "spark-1-mini",
|
|
||||||
"expiresAt": "2026-03-06T10:02:00.100Z",
|
|
||||||
"creditsUsed": 769
|
|
||||||
},
|
|
||||||
"id": "result-1772704921153-jx48m"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772699302521",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/",
|
|
||||||
"scrapedAt": "2026-03-05T09:23:03.452Z",
|
|
||||||
"data": {
|
|
||||||
"success": true,
|
|
||||||
"status": "completed",
|
|
||||||
"data": {
|
|
||||||
"announcements": [
|
|
||||||
{
|
|
||||||
"title": "高新区三级防控系统工程周三房浜闸站工程施工招标公告",
|
|
||||||
"amount": "5,923,797元",
|
|
||||||
"publish_date": "2026-03-05",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741071.shtml"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "[WXHS202603001-X01]惠山区紧密型县域医共体服务能力提标扩能建设项目(惠山区人民医院紧密型医共体资源共享中心建设项目)勘察设计",
|
|
||||||
"amount": "570.00万元",
|
|
||||||
"publish_date": "2026-03-05",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/03/05/4741246.shtml"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"model": "spark-1-mini",
|
|
||||||
"expiresAt": "2026-03-06T09:23:01.561Z",
|
|
||||||
"creditsUsed": 0
|
|
||||||
},
|
|
||||||
"id": "result-1772702583452-9t3b8"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "scraper-1772699302521",
|
|
||||||
"city": "无锡市",
|
|
||||||
"section": "水利工程",
|
|
||||||
"subsection": "",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/wxsggzyjyzxzl/",
|
|
||||||
"scrapedAt": "2026-03-05T08:39:45.736Z",
|
|
||||||
"data": {
|
|
||||||
"success": true,
|
|
||||||
"status": "completed",
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"title": "[WXJY202601013-X01]江阴市长泾镇蒲市村区域性综合农事服务中心江阴市",
|
|
||||||
"amount": "874.0万元",
|
|
||||||
"date": "2026-01-30",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726538.shtml"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "[WXXS202406006-X02]中共锡山区委党校异地新建项目施工总承包",
|
|
||||||
"amount": "10350.0万元",
|
|
||||||
"date": "2026-01-30",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726721.shtml"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "[WXXQ202601010-X01]无锡交响音乐厅“一厅”及“两中心”品牌商户用房",
|
|
||||||
"amount": "400.0万元",
|
|
||||||
"date": "2026-01-30",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726619.shtml"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "[WXXQ202601008-X01]生命园三期2号楼、3号楼改造项目工程总承包",
|
|
||||||
"amount": "3650.0万元",
|
|
||||||
"date": "2026-01-30",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726675.shtml"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "[WXBH202601007-X01]军嶂山显山透绿工程-吴杨路郊野覆绿工程施工",
|
|
||||||
"amount": "440.0万元",
|
|
||||||
"date": "2026-01-30",
|
|
||||||
"url": "https://ggzyjy.wuxi.gov.cn/doc/2026/01/30/4726726.shtml"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"model": "spark-1-mini",
|
|
||||||
"expiresAt": "2026-03-06T08:39:45.265Z",
|
|
||||||
"creditsUsed": 0
|
|
||||||
},
|
|
||||||
"id": "result-1772699985736-b3nca"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"scraperId": "nj-jtsw-zbgg",
|
|
||||||
"city": "南京市",
|
|
||||||
"section": "房建市政",
|
|
||||||
"subsection": "工程类",
|
|
||||||
"type": "招标公告",
|
|
||||||
"url": "https://njggzy.nanjing.gov.cn/njweb/",
|
|
||||||
"scrapedAt": "2026-03-05T08:05:33.097Z",
|
|
||||||
"data": {
|
|
||||||
"success": true,
|
|
||||||
"status": "completed",
|
|
||||||
"data": {
|
|
||||||
"招标公告": [
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】螺丝桥大街北延(月安街至应天大街段)道路工程 - 施工",
|
|
||||||
"项目金额": "900 万元",
|
|
||||||
"发布日期": "2026-03-05",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/1b3da624-fe86-4755-a268-a1967cd9d489.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "建邺路150-164号等9个地块城中村改造项目 - 施工",
|
|
||||||
"项目金额": "190 万元",
|
|
||||||
"发布日期": "2026-03-05",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/6f4fcf2f-d198-4814-acd8-9817ef559a0c.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程 - 施工",
|
|
||||||
"项目金额": "11320.01 万元",
|
|
||||||
"发布日期": "2026-03-05",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/11ec2263-4ed1-4115-bdd1-0a6dcbf1d6c1.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "栖霞区百水芊城春水坊等5个片区排水管网改造工程 - 施工",
|
|
||||||
"项目金额": "435.86 万元",
|
|
||||||
"发布日期": "2026-03-05",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260305/d69e5640-d549-4638-a64a-d1f9df58a903.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】兰桥八期保障性住房项目 - 新建居住区供配电工程",
|
|
||||||
"项目金额": "6000 万元",
|
|
||||||
"发布日期": "2026-03-04",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/33e25a55-42c4-471e-9a3c-f8e792957141.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "青云巷10号危房整治工程 - SG1施工",
|
|
||||||
"项目金额": "375 万元",
|
|
||||||
"发布日期": "2026-03-04",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/e821f82c-39d8-479e-9457-b6bf5d101d80.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "百水工业园地块保障房一期项目 - D地块1#楼(公安编号)室内装饰工程",
|
|
||||||
"项目金额": "600 万元",
|
|
||||||
"发布日期": "2026-03-04",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/5f8f2183-e26f-4c03-a76a-8b4d61b0011c.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "青云巷10号危房整治工程 - SG1施工",
|
|
||||||
"项目金额": "375 万元",
|
|
||||||
"发布日期": "2026-03-04",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260304/9aa2d916-c0c3-4fb6-afa4-37457f0d2ceb.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】全国高校区域技术转移转化中心生物药物创新平台 - 施工",
|
|
||||||
"项目金额": "11000 万元",
|
|
||||||
"发布日期": "2026-03-03",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260303/2d1fe57f-fe0e-42f9-a99a-c345683aed3f.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "轻质耐热合金制造基地项目 - 施工",
|
|
||||||
"项目金额": "11000 万元",
|
|
||||||
"发布日期": "2026-03-03",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001002/20260303/78b81308-1389-42fc-a8de-23b6b2b40be1.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】润埠花园二期项目 - 监理",
|
|
||||||
"项目金额": "111.37 万元",
|
|
||||||
"发布日期": "2026-03-05",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260305/acb0010f-dcbc-4ea4-a988-e4dc75670999.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "轻质耐热合金制造基地项目 - 监理",
|
|
||||||
"项目金额": "188 万元",
|
|
||||||
"发布日期": "2026-03-04",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260304/93ee4804-5a5e-4524-92a3-b6c367803bd1.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】南京江北新区无人机制造共享工厂项目 - 监理",
|
|
||||||
"项目金额": "212.44 万元",
|
|
||||||
"发布日期": "2026-03-04",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260304/e44a1d28-0f43-494e-8daf-2f81252ed06a.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "2026年四项环卫设施大中修项目 - 设计",
|
|
||||||
"项目金额": "25.58 万元",
|
|
||||||
"发布日期": "2026-03-03",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260303/225961f4-08c8-4398-99c9-7777bf0d16b7.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】南京市溧水区柘塘街道供水管网及配套设施提升改造工程 - 监理",
|
|
||||||
"项目金额": "164.33 万元",
|
|
||||||
"发布日期": "2026-03-03",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260303/a827d48e-8e1f-42c9-bd07-09ce369c20c6.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "江苏银行金融科技中心建设项目 - 勘察",
|
|
||||||
"项目金额": "170 万元",
|
|
||||||
"发布日期": "2026-03-02",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/0ead5303-03db-4d95-b8ea-b32070a39dfa.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】南京高新区溧水园和凤园区改扩建项目 - 精诚电工地块及惠诚工具地块扩建厂房设计",
|
|
||||||
"项目金额": "140.68 万元",
|
|
||||||
"发布日期": "2026-03-02",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/d8df73f9-88d0-4f5d-8831-f9857a1a4ebc.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】NO.新区2025G11房地产开发项目 - 全过程工程咨询服务",
|
|
||||||
"项目金额": "950 万元",
|
|
||||||
"发布日期": "2026-03-02",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260302/348f6add-d17e-406d-9690-b637762175d7.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "江苏省六合高级中学新建食堂体育馆项目 - 渣土运输处置",
|
|
||||||
"项目金额": "242.97917 万元",
|
|
||||||
"发布日期": "2026-02-28",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260228/2099a860-b3c2-411f-8580-72cbb55fef42.html"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"标题": "【澄清公告】药谷产业区药谷大道(华宝路-汤盘公路)建设工程 - 勘察设计",
|
|
||||||
"项目金额": "194 万元",
|
|
||||||
"发布日期": "2026-02-28",
|
|
||||||
"详情页完整URL": "https://njggzy.nanjing.gov.cn/njweb/fjsz/068001/068001001/20260228/ffee9562-374d-43fd-8829-bf51c5b3cb46.html"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"model": "spark-1-mini",
|
|
||||||
"expiresAt": "2026-03-06T08:05:31.995Z",
|
|
||||||
"creditsUsed": 0
|
|
||||||
},
|
|
||||||
"id": "result-1772697933097-7hm4v"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
275
src/firecrawlBrowserScraper.js
Normal file
275
src/firecrawlBrowserScraper.js
Normal file
@@ -0,0 +1,275 @@
|
|||||||
|
const DEFAULT_SCRAPER_PROMPT = '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等)、发布日期(YYYY-MM-DD格式)、详情页完整URL';
|
||||||
|
const PAYLOAD_MARKER = '__FC_PAYLOAD__';
|
||||||
|
|
||||||
|
function pad2(value) {
|
||||||
|
return String(value).padStart(2, '0');
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDate(year, month, day) {
|
||||||
|
return `${year}-${pad2(month)}-${pad2(day)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTodayInShanghai() {
|
||||||
|
return new Intl.DateTimeFormat('en-CA', {
|
||||||
|
timeZone: 'Asia/Shanghai',
|
||||||
|
year: 'numeric',
|
||||||
|
month: '2-digit',
|
||||||
|
day: '2-digit',
|
||||||
|
}).format(new Date());
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseTargetDate(prompt) {
|
||||||
|
const text = String(prompt || '');
|
||||||
|
if (!text) return null;
|
||||||
|
|
||||||
|
const fullDate = text.match(/(20\d{2})[-/.年](\d{1,2})[-/.月](\d{1,2})日?/);
|
||||||
|
if (fullDate) {
|
||||||
|
return formatDate(fullDate[1], fullDate[2], fullDate[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (/(今天|今日|当日)/.test(text)) {
|
||||||
|
return getTodayInShanghai();
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeDate(input) {
|
||||||
|
if (!input) return '';
|
||||||
|
const text = String(input).trim();
|
||||||
|
if (!text) return '';
|
||||||
|
|
||||||
|
let m = text.match(/(20\d{2})[-/.年](\d{1,2})[-/.月](\d{1,2})日?/);
|
||||||
|
if (m) return formatDate(m[1], m[2], m[3]);
|
||||||
|
|
||||||
|
m = text.match(/(\d{1,2})[-/.月](\d{1,2})日?/);
|
||||||
|
if (m) {
|
||||||
|
const currentYear = Number(getTodayInShanghai().slice(0, 4));
|
||||||
|
return formatDate(currentYear, m[1], m[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractDateFromText(text) {
|
||||||
|
if (!text) return '';
|
||||||
|
const m = String(text).match(/(20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}日?)|(\d{1,2}[-/.月]\d{1,2}日?)/);
|
||||||
|
return m ? normalizeDate(m[0]) : '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractAmountFromText(text) {
|
||||||
|
if (!text) return null;
|
||||||
|
const m = String(text).match(/([0-9][0-9,.\s]*(?:亿元|万元|万|元))/);
|
||||||
|
if (!m) return null;
|
||||||
|
return m[1].replace(/\s+/g, '').trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanText(text) {
|
||||||
|
return String(text || '').replace(/\s+/g, ' ').trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function toFiniteNumber(value, fallback) {
|
||||||
|
const n = Number(value);
|
||||||
|
return Number.isFinite(n) ? n : fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parsePayloadFromText(rawText) {
|
||||||
|
if (!rawText) return null;
|
||||||
|
const text = String(rawText);
|
||||||
|
|
||||||
|
const markerIndex = text.lastIndexOf(PAYLOAD_MARKER);
|
||||||
|
if (markerIndex >= 0) {
|
||||||
|
const tail = text.slice(markerIndex + PAYLOAD_MARKER.length);
|
||||||
|
const firstLine = tail.split(/\r?\n/).find(line => line.trim());
|
||||||
|
if (firstLine) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(firstLine.trim());
|
||||||
|
} catch {
|
||||||
|
// Continue fallback parsing.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return JSON.parse(text.trim());
|
||||||
|
} catch {
|
||||||
|
// Continue fallback parsing.
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = text.split(/\r?\n/).map(line => line.trim()).filter(Boolean).reverse();
|
||||||
|
for (const line of lines) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(line);
|
||||||
|
} catch {
|
||||||
|
// Try next line.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseBrowserExecutePayload(executeResult) {
|
||||||
|
const sources = [executeResult?.result, executeResult?.stdout]
|
||||||
|
.filter(value => typeof value === 'string' && value.trim().length > 0);
|
||||||
|
|
||||||
|
for (const source of sources) {
|
||||||
|
const payload = parsePayloadFromText(source);
|
||||||
|
if (payload && typeof payload === 'object') return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { items: [] };
|
||||||
|
}
|
||||||
|
|
||||||
|
function splitKeywords(input) {
|
||||||
|
return String(input || '')
|
||||||
|
.split(/[、/,,|\s]+/)
|
||||||
|
.map(item => item.trim())
|
||||||
|
.filter(item => item.length >= 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
function filterByTypeIfPossible(items, type) {
|
||||||
|
const keywords = splitKeywords(type);
|
||||||
|
if (keywords.length === 0) return items;
|
||||||
|
|
||||||
|
const filtered = items.filter(item => {
|
||||||
|
const haystack = `${item.title} ${item.context || ''}`;
|
||||||
|
return keywords.some(keyword => haystack.includes(keyword));
|
||||||
|
});
|
||||||
|
|
||||||
|
return filtered.length > 0 ? filtered : items;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeItems(rawItems, targetDate, scraperType) {
|
||||||
|
const dedup = new Map();
|
||||||
|
|
||||||
|
for (const raw of rawItems) {
|
||||||
|
const title = cleanText(raw?.title);
|
||||||
|
const url = cleanText(raw?.url);
|
||||||
|
if (!title || !url) continue;
|
||||||
|
|
||||||
|
const context = cleanText(raw?.context);
|
||||||
|
const date = normalizeDate(raw?.date) || extractDateFromText(context);
|
||||||
|
const amount = cleanText(raw?.amount) || extractAmountFromText(context) || null;
|
||||||
|
const key = `${title}@@${url}`;
|
||||||
|
|
||||||
|
if (!dedup.has(key)) {
|
||||||
|
dedup.set(key, { title, amount, date, url, context });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let items = Array.from(dedup.values());
|
||||||
|
items = filterByTypeIfPossible(items, scraperType);
|
||||||
|
|
||||||
|
if (targetDate) {
|
||||||
|
items = items.filter(item => item.date === targetDate);
|
||||||
|
}
|
||||||
|
|
||||||
|
return items
|
||||||
|
.map(({ title, amount, date, url }) => ({ title, amount, date, url }))
|
||||||
|
.slice(0, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildBrowserScript(url) {
|
||||||
|
return `
|
||||||
|
const targetUrl = ${JSON.stringify(url)};
|
||||||
|
await page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||||
|
await page.waitForTimeout(1500);
|
||||||
|
|
||||||
|
const payload = await page.evaluate(() => {
|
||||||
|
const normalize = (value) => String(value || '').replace(/\\s+/g, ' ').trim();
|
||||||
|
const blockedTitles = new Set(['首页', '尾页', '上一页', '下一页', '更多', '详情', '查看', '返回', '跳转']);
|
||||||
|
|
||||||
|
const links = Array.from(document.querySelectorAll('a[href]'));
|
||||||
|
const rows = [];
|
||||||
|
const seen = new Set();
|
||||||
|
|
||||||
|
for (const a of links) {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
if (!href || href.startsWith('javascript:') || href.startsWith('#')) continue;
|
||||||
|
|
||||||
|
const title = normalize(a.textContent);
|
||||||
|
if (!title || title.length < 6 || title.length > 180) continue;
|
||||||
|
if (blockedTitles.has(title)) continue;
|
||||||
|
|
||||||
|
let absoluteUrl = '';
|
||||||
|
try {
|
||||||
|
absoluteUrl = new URL(href, location.href).href;
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const container = a.closest('tr,li,article,section,div,p,dd,dt') || a.parentElement;
|
||||||
|
const context = normalize(container ? container.textContent : title);
|
||||||
|
|
||||||
|
const dateMatch = context.match(/(20\\d{2}[-/.年]\\d{1,2}[-/.月]\\d{1,2}日?)|(\\d{1,2}[-/.月]\\d{1,2}日?)/);
|
||||||
|
const amountMatch = context.match(/([0-9][0-9,.\\s]*(?:亿元|万元|万|元))/);
|
||||||
|
|
||||||
|
const key = (title + '@@' + absoluteUrl).toLowerCase();
|
||||||
|
if (seen.has(key)) continue;
|
||||||
|
seen.add(key);
|
||||||
|
|
||||||
|
rows.push({
|
||||||
|
title,
|
||||||
|
url: absoluteUrl,
|
||||||
|
date: dateMatch ? dateMatch[0] : '',
|
||||||
|
amount: amountMatch ? amountMatch[0].replace(/\\s+/g, '') : null,
|
||||||
|
context,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
pageUrl: location.href,
|
||||||
|
items: rows.slice(0, 300),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('${PAYLOAD_MARKER}' + JSON.stringify(payload));
|
||||||
|
JSON.stringify(payload);
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runScraperWithBrowser(firecrawl, scraper, options = {}) {
|
||||||
|
const prefix = options.logPrefix || '[Browser]';
|
||||||
|
if (!scraper?.url) throw new Error('抓取 URL 不能为空');
|
||||||
|
|
||||||
|
const prompt = scraper.prompt || DEFAULT_SCRAPER_PROMPT;
|
||||||
|
const targetDate = parseTargetDate(prompt);
|
||||||
|
|
||||||
|
const ttl = toFiniteNumber(scraper.browserTtl, 180);
|
||||||
|
const activityTtl = toFiniteNumber(scraper.browserActivityTtl, 90);
|
||||||
|
|
||||||
|
const session = await firecrawl.browser({ ttl, activityTtl });
|
||||||
|
if (!session?.success || !session.id) {
|
||||||
|
throw new Error(session?.error || '创建 Browser 会话失败');
|
||||||
|
}
|
||||||
|
|
||||||
|
let executeResult;
|
||||||
|
try {
|
||||||
|
executeResult = await firecrawl.browserExecute(session.id, {
|
||||||
|
code: buildBrowserScript(scraper.url),
|
||||||
|
language: 'node',
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
await firecrawl.deleteBrowser(session.id);
|
||||||
|
} catch (closeError) {
|
||||||
|
console.warn(`${prefix} 会话关闭失败: ${closeError.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!executeResult?.success) {
|
||||||
|
throw new Error(executeResult?.error || executeResult?.stderr || 'Browser 执行失败');
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = parseBrowserExecutePayload(executeResult);
|
||||||
|
const rawItems = Array.isArray(payload.items) ? payload.items : [];
|
||||||
|
const items = normalizeItems(rawItems, targetDate, scraper.type);
|
||||||
|
|
||||||
|
console.log(`${prefix} URL=${scraper.url} raw=${rawItems.length} normalized=${items.length}${targetDate ? ` targetDate=${targetDate}` : ''}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
items,
|
||||||
|
targetDate,
|
||||||
|
pageUrl: payload.pageUrl || scraper.url,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -4,8 +4,8 @@ import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
import { dirname, join } from 'path';
|
import { dirname, join } from 'path';
|
||||||
import Firecrawl from '@mendable/firecrawl-js';
|
import Firecrawl from '@mendable/firecrawl-js';
|
||||||
import { z } from 'zod';
|
|
||||||
import { sendScraperResultsEmail } from './emailService.js';
|
import { sendScraperResultsEmail } from './emailService.js';
|
||||||
|
import { runScraperWithBrowser } from './firecrawlBrowserScraper.js';
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
@@ -48,63 +48,12 @@ function appendResult(result) {
|
|||||||
saveResults(results);
|
saveResults(results);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========== 统一的公告抓取 Schema ==========
|
|
||||||
|
|
||||||
// 公告抓取 Schema(result 包装数组)
|
|
||||||
const announcementSchema = z.object({
|
|
||||||
result: z.array(z.object({
|
|
||||||
title: z.string().describe('公告标题'),
|
|
||||||
amount: z.string().nullable().describe('项目金额(合同预估价/最高投标限价等),没有则为null'),
|
|
||||||
date: z.string().describe('发布日期,YYYY-MM-DD格式'),
|
|
||||||
url: z.string().describe('详情页完整URL,以https://开头'),
|
|
||||||
})).describe('页面上提取到的所有公告条目'),
|
|
||||||
});
|
|
||||||
|
|
||||||
/** 从 Firecrawl 返回结果中提取 result 数组 */
|
|
||||||
function extractItems(raw) {
|
|
||||||
if (!raw) return [];
|
|
||||||
const root = (raw.data && typeof raw.data === 'object') ? raw.data : raw;
|
|
||||||
if (Array.isArray(root.result)) return root.result;
|
|
||||||
if (root.result && typeof root.result === 'object') {
|
|
||||||
const keys = Object.keys(root.result).filter(k => !isNaN(parseInt(k)));
|
|
||||||
if (keys.length > 0) return keys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root.result[k]);
|
|
||||||
}
|
|
||||||
if (Array.isArray(root)) return root;
|
|
||||||
const numericKeys = Object.keys(root).filter(k => !isNaN(parseInt(k)));
|
|
||||||
if (numericKeys.length > 0) return numericKeys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root[k]);
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// ========== 抓取执行(复用 server.js 中 runScraper 的逻辑) ==========
|
// ========== 抓取执行(复用 server.js 中 runScraper 的逻辑) ==========
|
||||||
|
|
||||||
async function runScraper(scraper) {
|
async function runScraper(scraper) {
|
||||||
console.log(`[定时任务] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`);
|
console.log(`[定时任务][Browser] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`);
|
||||||
const fullPrompt = `访问这个URL: ${scraper.url}
|
const { items } = await runScraperWithBrowser(firecrawl, scraper, { logPrefix: '[Browser][Scheduler]' });
|
||||||
【目标区域】:${scraper.section || ''} - ${scraper.subsection || ''}
|
console.log(`[定时任务][Browser] 提取到 ${items.length} 条公告`);
|
||||||
【公告类型】:${scraper.type || ''}
|
|
||||||
|
|
||||||
${scraper.prompt || '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL'}
|
|
||||||
|
|
||||||
请严格按照定义的 JSON 格式返回,每条公告包含 title、amount、date、url 四个字段。`;
|
|
||||||
|
|
||||||
const result = await firecrawl.agent({
|
|
||||||
prompt: fullPrompt,
|
|
||||||
schema: announcementSchema,
|
|
||||||
model: scraper.model || 'spark-1-mini',
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log('[定时任务] 原始返回结果:', JSON.stringify(result).slice(0, 500));
|
|
||||||
|
|
||||||
// 标准化结果
|
|
||||||
const rawItems = extractItems(result);
|
|
||||||
const items = rawItems.map(item => ({
|
|
||||||
title: item.title || '',
|
|
||||||
amount: item.amount || null,
|
|
||||||
date: item.date || '',
|
|
||||||
url: item.url || '',
|
|
||||||
}));
|
|
||||||
|
|
||||||
console.log(`[定时任务] 提取到 ${items.length} 条公告`);
|
|
||||||
|
|
||||||
const record = {
|
const record = {
|
||||||
scraperId: scraper.id,
|
scraperId: scraper.id,
|
||||||
|
|||||||
@@ -2,12 +2,12 @@ import 'dotenv/config';
|
|||||||
import express from 'express';
|
import express from 'express';
|
||||||
import cors from 'cors';
|
import cors from 'cors';
|
||||||
import Firecrawl from '@mendable/firecrawl-js';
|
import Firecrawl from '@mendable/firecrawl-js';
|
||||||
import { z } from 'zod';
|
|
||||||
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
import { dirname, join } from 'path';
|
import { dirname, join } from 'path';
|
||||||
import { sendCombinedReportEmail } from './emailService.js';
|
import { sendCombinedReportEmail } from './emailService.js';
|
||||||
import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js';
|
import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js';
|
||||||
|
import { runScraperWithBrowser } from './firecrawlBrowserScraper.js';
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
const PORT = process.env.PORT || 5000;
|
const PORT = process.env.PORT || 5000;
|
||||||
@@ -171,68 +171,11 @@ app.delete('/api/scrapers/:id', (req, res) => {
|
|||||||
|
|
||||||
// ========== 统一抓取执行 ==========
|
// ========== 统一抓取执行 ==========
|
||||||
|
|
||||||
// 公告抓取 Schema(result 包装数组)
|
|
||||||
const announcementSchema = z.object({
|
|
||||||
result: z.array(z.object({
|
|
||||||
title: z.string().describe('公告标题'),
|
|
||||||
amount: z.string().nullable().describe('项目金额(合同预估价/最高投标限价等),没有则为null'),
|
|
||||||
date: z.string().describe('发布日期,YYYY-MM-DD格式'),
|
|
||||||
url: z.string().describe('详情页完整URL,以https://开头'),
|
|
||||||
})).describe('页面上提取到的所有公告条目'),
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 从 Firecrawl agent 返回结果中提取 result 数组
|
|
||||||
* 优先取 root.result,再回退数字键处理
|
|
||||||
*/
|
|
||||||
function extractItems(raw) {
|
|
||||||
if (!raw) return [];
|
|
||||||
const root = (raw.data && typeof raw.data === 'object') ? raw.data : raw;
|
|
||||||
// 最优先:result 是真正数组
|
|
||||||
if (Array.isArray(root.result)) return root.result;
|
|
||||||
// result 是数字键对象
|
|
||||||
if (root.result && typeof root.result === 'object') {
|
|
||||||
const keys = Object.keys(root.result).filter(k => !isNaN(parseInt(k)));
|
|
||||||
if (keys.length > 0) return keys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root.result[k]);
|
|
||||||
}
|
|
||||||
// 如果 root 本身是数组
|
|
||||||
if (Array.isArray(root)) return root;
|
|
||||||
// 顶层数字键回退
|
|
||||||
const numericKeys = Object.keys(root).filter(k => !isNaN(parseInt(k)));
|
|
||||||
if (numericKeys.length > 0) return numericKeys.sort((a, b) => parseInt(a) - parseInt(b)).map(k => root[k]);
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// 执行单个抓取来源并保存结果
|
// 执行单个抓取来源并保存结果
|
||||||
async function runScraper(scraper) {
|
async function runScraper(scraper) {
|
||||||
console.log(`[Agent] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`);
|
console.log(`[Browser] ${scraper.city} - ${scraper.section} ${scraper.subsection} - ${scraper.type}:${scraper.url}`);
|
||||||
const fullPrompt = `访问这个URL: ${scraper.url}
|
const { items } = await runScraperWithBrowser(firecrawl, scraper, { logPrefix: '[Browser][API]' });
|
||||||
【目标区域】:${scraper.section || ''} - ${scraper.subsection || ''}
|
console.log(`[Browser] 提取到 ${items.length} 条公告`);
|
||||||
【公告类型】:${scraper.type || ''}
|
|
||||||
|
|
||||||
${scraper.prompt || '提取页面上今日的招标公告信息,包括:标题、项目金额(可能为合同预估价/最高投标限价等等)、发布日期(YYYY-MM-DD格式)、详情页完整URL'}
|
|
||||||
|
|
||||||
请严格按照定义的 JSON 格式返回,每条公告包含 title、amount、date、url 四个字段。`;
|
|
||||||
console.log(fullPrompt, 'fullPrompt=======');
|
|
||||||
|
|
||||||
const result = await firecrawl.agent({
|
|
||||||
prompt: fullPrompt,
|
|
||||||
schema: announcementSchema,
|
|
||||||
model: scraper.model || 'spark-1-mini',
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log('[Agent] 原始返回结果:', JSON.stringify(result).slice(0, 500));
|
|
||||||
|
|
||||||
const rawItems = extractItems(result);
|
|
||||||
const items = rawItems.map(item => ({
|
|
||||||
title: item.title || '',
|
|
||||||
amount: item.amount || null,
|
|
||||||
date: item.date || '',
|
|
||||||
url: item.url || '',
|
|
||||||
}));
|
|
||||||
|
|
||||||
console.log(`[Agent] 提取到 ${items.length} 条公告`);
|
|
||||||
|
|
||||||
const record = {
|
const record = {
|
||||||
scraperId: scraper.id,
|
scraperId: scraper.id,
|
||||||
|
|||||||
Reference in New Issue
Block a user