```

feat(llm): 添加AI智能分析配置功能新增LLM配置模块，支持通过阿里云DashScope API进行招标金额的智能提取。配置包括API Key、Base URL、模型选择等，并提供启用开关。前端界面增加“AI配置”标签页，包含状态展示、配置表单及测试连接功能。后端增强parseDetailEnhanced方法，优先使用LLM提取金额，失败时降级至正则表达式。同时实现LLM状态查询与连接测试接口，确保配置有效性。配置文件中新增llm字段，默认关闭，支持安全存储API密钥。 ```
2025-12-15 17:49:11 +08:00
parent f797ed9a61
commit 6fc9748009
6 changed files with 562 additions and 7 deletions
--- a/src/llmService.js
+++ b/src/llmService.js
@@ -0,0 +1,237 @@
+/**
+ * LLM 服务模块 - 使用阿里云通义千问 API 提取招标金额
+ */
+
+import { readFileSync } from 'fs';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+// 获取 LLM 配置
+function getLLMConfig() {
+  try {
+    const configPath = join(__dirname, '..', 'config.json');
+    const configContent = readFileSync(configPath, 'utf-8');
+    const config = JSON.parse(configContent);
+    return config.llm || null;
+  } catch (err) {
+    console.error('读取 LLM 配置失败:', err.message);
+    return null;
+  }
+}
+
+// 检查 LLM 是否已启用
+export function isLLMEnabled() {
+  const config = getLLMConfig();
+  return config && config.enabled && config.apiKey;
+}
+
+// 使用 LLM 提取招标金额
+export async function extractBudgetWithLLM(content) {
+  const config = getLLMConfig();
+
+  if (!config || !config.enabled || !config.apiKey) {
+    return null;
+  }
+
+  // 智能提取包含金额信息的段落，避免截断丢失关键信息
+  const maxContentLength = 4000;
+  let truncatedContent = content;
+
+  if (content.length > maxContentLength) {
+    // 查找金额关键词的位置，提取关键词周围的上下文
+    const budgetKeywords = ['预算金额', '项目预算', '采购预算', '控制价', '最高限价', '招标金额', '项目金额', '合同金额', '投标报价', '中标金额', '成交金额', '中标价', '成交价'];
+    const contextRadius = 200; // 关键词前后各取200字符
+    const extractedContexts = [];
+
+    for (const keyword of budgetKeywords) {
+      let pos = content.indexOf(keyword);
+      while (pos !== -1) {
+        const start = Math.max(0, pos - contextRadius);
+        const end = Math.min(content.length, pos + keyword.length + contextRadius);
+        extractedContexts.push(content.substring(start, end));
+        pos = content.indexOf(keyword, pos + 1);
+      }
+    }
+
+    if (extractedContexts.length > 0) {
+      // 有相关内容，拼接：开头部分 + 提取的上下文
+      const headerContent = content.substring(0, 1500);
+      const relevantContent = [...new Set(extractedContexts)].join('\n---\n'); // 去重
+      truncatedContent = headerContent + '\n\n【以下为金额相关内容】\n' + relevantContent;
+
+      if (truncatedContent.length > maxContentLength) {
+        truncatedContent = truncatedContent.substring(0, maxContentLength) + '...(内容已截断)';
+      }
+    } else {
+      // 没找到相关内容，使用原来的截断方式
+      truncatedContent = content.substring(0, maxContentLength) + '...(内容已截断)';
+    }
+  }
+
+  const prompt = `你是一个专业的招标文件分析助手。请从以下招标公告内容中提取预算金额信息。
+
+要求：
+1. 优先查找以下字段对应的金额：预算金额、项目预算、采购预算、预算、控制价、最高限价、招标金额、项目金额、合同金额、投标报价、中标金额、成交金额、中标价、成交价
+2. 如果有多个金额，优先选择"预算金额"或"项目预算"
+3. 金额统一转换为万元单位（如 70万元 = 70，700000元 = 70）
+4. 严格按照 JSON 格式返回，不要添加任何其他文字
+
+常见格式示例：
+- "预算金额：70万元" → amount: 70
+- "预算金额：700000元" → amount: 70
+- "项目预算：70.00万元" → amount: 70
+
+返回格式（必须是合法的 JSON）：
+{"amount": 数值, "unit": "万元", "text": "原文中的金额描述"}
+
+如果没有找到金额，返回：
+{"amount": null, "unit": null, "text": null}
+
+公告内容：
+${truncatedContent}`;
+
+  try {
+    const response = await fetch(`${config.baseUrl}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${config.apiKey}`,
+      },
+      body: JSON.stringify({
+        model: config.model || 'qwen-turbo',
+        messages: [
+          {
+            role: 'user',
+            content: prompt,
+          },
+        ],
+        temperature: 0.1, // 低温度，保证输出稳定
+        max_tokens: 200,
+      }),
+      signal: AbortSignal.timeout(15000), // 15秒超时
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      console.error('LLM API 错误:', response.status, errorText);
+      return null;
+    }
+
+    const data = await response.json();
+    const assistantMessage = data.choices?.[0]?.message?.content;
+
+    if (!assistantMessage) {
+      console.error('LLM 返回内容为空');
+      return null;
+    }
+
+    // 解析 JSON 响应
+    const jsonMatch = assistantMessage.match(/\{[\s\S]*\}/);
+    if (!jsonMatch) {
+      console.error('LLM 返回格式异常:', assistantMessage);
+      return null;
+    }
+
+    const result = JSON.parse(jsonMatch[0]);
+
+    if (result.amount === null || result.amount === undefined) {
+      return null;
+    }
+
+    // 验证金额合理性
+    const amount = parseFloat(result.amount);
+    if (isNaN(amount) || amount < 0.01 || amount > 100000000) {
+      console.error('LLM 提取的金额不合理:', result.amount);
+      return null;
+    }
+
+    console.log(`LLM 提取金额成功: ${amount} 万元`);
+
+    return {
+      amount: amount,
+      unit: '万元',
+      text: result.text || `${amount}万元`,
+      source: 'llm', // 标记来源
+    };
+  } catch (err) {
+    if (err.name === 'TimeoutError') {
+      console.error('LLM API 超时');
+    } else {
+      console.error('LLM 提取金额失败:', err.message);
+    }
+    return null;
+  }
+}
+
+// 测试 LLM 连接
+export async function testLLMConnection() {
+  const config = getLLMConfig();
+
+  if (!config || !config.apiKey) {
+    return { success: false, error: '未配置 API Key' };
+  }
+
+  try {
+    const response = await fetch(`${config.baseUrl}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${config.apiKey}`,
+      },
+      body: JSON.stringify({
+        model: config.model || 'qwen-turbo',
+        messages: [
+          {
+            role: 'user',
+            content: '请回复"连接成功"',
+          },
+        ],
+        max_tokens: 10,
+      }),
+      signal: AbortSignal.timeout(10000),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      return { success: false, error: `API 错误: ${response.status} - ${errorText}` };
+    }
+
+    const data = await response.json();
+    const reply = data.choices?.[0]?.message?.content;
+
+    return {
+      success: true,
+      message: '连接成功',
+      model: config.model || 'qwen-turbo',
+      reply: reply,
+    };
+  } catch (err) {
+    if (err.name === 'TimeoutError') {
+      return { success: false, error: '连接超时' };
+    }
+    return { success: false, error: err.message };
+  }
+}
+
+// 获取 LLM 状态
+export function getLLMStatus() {
+  const config = getLLMConfig();
+
+  if (!config) {
+    return {
+      configured: false,
+      enabled: false,
+      model: null,
+    };
+  }
+
+  return {
+    configured: !!config.apiKey,
+    enabled: config.enabled && !!config.apiKey,
+    model: config.model || 'qwen-turbo',
+    baseUrl: config.baseUrl,
+  };
+}
--- a/src/server.js
+++ b/src/server.js
@@ -6,6 +6,7 @@ import * as cheerio from 'cheerio';
 import iconv from 'iconv-lite';
 import { sendReportEmail } from './emailService.js';
 import { initScheduler, runTaskNow, reloadScheduler, getSchedulerStatus } from './scheduler.js';
+import { extractBudgetWithLLM, testLLMConnection, getLLMStatus, isLLMEnabled } from './llmService.js';

 const app = express();
 const PORT = process.env.PORT || 5000;
@@ -321,7 +322,7 @@ function parseDetail(html) {
  };
 }

-// 增强版parseDetail，支持PDF解析
+// 增强版parseDetail，支持PDF解析和LLM金额提取
 async function parseDetailEnhanced(html, pageUrl) {
  const $ = cheerio.load(html);

@@ -359,8 +360,25 @@ async function parseDetailEnhanced(html, pageUrl) {
    content = htmlDetail.content;
  }

-  // 使用现有的extractBudget函数提取金额
-  const budget = extractBudget(content);
+  // 提取金额：优先使用 LLM，失败则降级到正则表达式
+  let budget = null;
+  if (isLLMEnabled()) {
+    console.log('使用 LLM 提取金额...');
+    budget = await extractBudgetWithLLM(content);
+    if (budget) {
+      console.log(`LLM 提取成功: ${budget.amount} ${budget.unit}`);
+    } else {
+      console.log('LLM 提取失败，降级到正则表达式');
+    }
+  }
+
+  // 如果 LLM 未启用或提取失败，使用正则表达式
+  if (!budget) {
+    budget = extractBudget(content);
+    if (budget) {
+      budget.source = 'regex'; // 标记来源
+    }
+  }

  // 获取其他基本信息（标题、发布时间等）
  const basicInfo = parseDetail(html);
@@ -749,10 +767,13 @@ app.get('/api/config', async (req, res) => {
    const configContent = readFileSync(configPath, 'utf-8');
    const config = JSON.parse(configContent);

-    // 不返回敏感信息(密码)
+    // 不返回敏感信息(密码和API Key)
    if (config.email && config.email.smtpPass) {
      config.email.smtpPass = '***已配置***';
    }
+    if (config.llm && config.llm.apiKey) {
+      config.llm.apiKey = '***已配置***';
+    }

    res.json({ success: true, data: config });
  } catch (error) {
@@ -774,11 +795,18 @@ app.post('/api/config', async (req, res) => {

    const newConfig = req.body;

+    // 读取旧配置以保留敏感信息
+    const oldConfigContent = readFileSync(configPath, 'utf-8');
+    const oldConfig = JSON.parse(oldConfigContent);
+
    // 如果密码字段是占位符,保留原密码
    if (newConfig.email && newConfig.email.smtpPass === '***已配置***') {
-      const oldConfigContent = readFileSync(configPath, 'utf-8');
-      const oldConfig = JSON.parse(oldConfigContent);
-      newConfig.email.smtpPass = oldConfig.email.smtpPass;
+      newConfig.email.smtpPass = oldConfig.email?.smtpPass || '';
+    }
+
+    // 如果 LLM API Key 是占位符,保留原 API Key
+    if (newConfig.llm && newConfig.llm.apiKey === '***已配置***') {
+      newConfig.llm.apiKey = oldConfig.llm?.apiKey || '';
    }

    // 保存配置
@@ -793,6 +821,26 @@ app.post('/api/config', async (req, res) => {
  }
 });

+// LLM 状态接口
+app.get('/api/llm/status', async (req, res) => {
+  try {
+    const status = getLLMStatus();
+    res.json({ success: true, data: status });
+  } catch (error) {
+    res.status(500).json({ success: false, error: error.message });
+  }
+});
+
+// LLM 连接测试接口
+app.post('/api/llm/test', async (req, res) => {
+  try {
+    const result = await testLLMConnection();
+    res.json({ success: result.success, data: result });
+  } catch (error) {
+    res.status(500).json({ success: false, error: error.message });
+  }
+});
+
 // 获取定时任务状态
 app.get('/api/scheduler/status', async (req, res) => {
  try {