refactor: migrate workflow to v2 core and archive legacy modules

2026-03-04 21:52:24 +08:00
parent e1ce17f2aa
commit fa61b11b02
156 changed files with 1781 additions and 2066 deletions
--- a/legacy/intent_analyzer.py
+++ b/legacy/intent_analyzer.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+"""
+语义匹配 - 用 embedding 做意图/情绪识别
+配置 EMBEDDING_MODEL 后启用，否则回退到关键词
+"""
+import os
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# 意图模板（用于 embedding 相似度匹配）
+INTENT_TEMPLATES = {
+    "询价": "我想问一下价格多少钱",
+    "发图": "我发图给你看看",
+    "砍价": "能不能便宜点太贵了",
+    "批量": "我要做很多张图批量",
+    "加急": "能不能快点很急",
+    "售后": "已经付款了什么时候好",
+    "修改": "不满意要改一下",
+    "转接": "我要退款投诉",
+    "打招呼": "你好在吗有人吗",
+}
+EMOTION_TEMPLATES = {
+    "平静": "好的谢谢",
+    "着急": "快点啊很急",
+    "不满": "怎么这么慢不满意",
+    "砍价": "太贵了便宜点",
+}
+
+
+_template_embeddings: dict = {}
+
+
+@dataclass
+class IntentDecision:
+    intent: str = ""
+    source: str = "none"  # embedding / keyword / none
+    score: float = 0.0
+
+def _get_embedding(text: str, cache_key: str = None) -> Optional[list]:
+    """调用 embedding API，失败返回 None。cache_key 用于缓存模板向量"""
+    model = os.getenv("EMBEDDING_MODEL", "")
+    if not model:
+        return None
+    if cache_key and cache_key in _template_embeddings:
+        return _template_embeddings[cache_key]
+    try:
+        from openai import OpenAI
+        client = OpenAI(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            base_url=os.getenv("OPENAI_BASE_URL"),
+        )
+        resp = client.embeddings.create(model=model, input=text[:2000])
+        emb = resp.data[0].embedding
+        if cache_key:
+            _template_embeddings[cache_key] = emb
+        return emb
+    except Exception as e:
+        logger.debug(f"embedding 失败: {e}")
+        return None
+
+
+def _cosine_sim(a: list, b: list) -> float:
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    na = sum(x * x for x in a) ** 0.5
+    nb = sum(y * y for y in b) ** 0.5
+    if na == 0 or nb == 0:
+        return 0.0
+    return dot / (na * nb)
+
+
+def detect_intent_embedding(msg: str) -> Optional[str]:
+    """用 embedding 检测意图，未配置或失败返回 None。"""
+    decision = detect_intent_embedding_decision(msg)
+    return decision.intent or None
+
+
+def detect_intent_embedding_decision(msg: str) -> IntentDecision:
+    """返回 embedding 意图决策（含分值）。"""
+    msg_emb = _get_embedding(msg)
+    if not msg_emb:
+        return IntentDecision()
+    best_intent, best_score = "", 0.0
+    for intent, template in INTENT_TEMPLATES.items():
+        tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}")
+        if not tpl_emb:
+            continue
+        sim = _cosine_sim(msg_emb, tpl_emb)
+        if sim > best_score:
+            best_score = sim
+            best_intent = intent
+    if best_score > 0.6:
+        return IntentDecision(intent=best_intent, source="embedding", score=float(best_score))
+    return IntentDecision()
+
+
+def detect_emotion_embedding(msg: str) -> Optional[str]:
+    """用 embedding 检测情绪"""
+    msg_emb = _get_embedding(msg)
+    if not msg_emb:
+        return None
+    best_emotion, best_score = "", 0.0
+    for emotion, template in EMOTION_TEMPLATES.items():
+        tpl_emb = _get_embedding(template, cache_key=f"emotion_{emotion}")
+        if not tpl_emb:
+            continue
+        sim = _cosine_sim(msg_emb, tpl_emb)
+        if sim > best_score:
+            best_score = sim
+            best_emotion = emotion
+    return best_emotion if best_score > 0.55 else None
+
+
+def detect_intent_keywords(msg: str) -> str:
+    """关键词回退：无 embedding 时使用"""
+    m = (msg or "").strip().lower()
+    if any(k in m for k in ["退款", "退货", "投诉"]):
+        return "转接"
+    if any(k in m for k in ["多张", "批量", "很多", "几十张"]):
+        return "批量"
+    if any(k in m for k in ["快点", "加急", "很急", "着急"]):
+        return "加急"
+    if any(k in m for k in ["便宜", "贵", "少点", "打折"]):
+        return "砍价"
+    if any(k in m for k in ["改", "修改", "不满意"]):
+        return "修改"
+    if any(k in m for k in ["多少钱", "价格", "报价", "多钱", "收费", "怎么收费", "咋收费"]):
+        return "询价"
+    if any(k in m for k in ["在吗", "你好", "有人"]):
+        return "打招呼"
+    return ""
+
+
+def detect_intent(msg: str) -> IntentDecision:
+    """
+    AI 意图判定 + 规则兜底：
+    1) 有 embedding 配置时先走 embedding。
+    2) 失败/低置信时回退关键词规则。
+    """
+    text = (msg or "").strip()
+    if not text:
+        return IntentDecision()
+
+    try:
+        emb_decision = detect_intent_embedding_decision(text)
+    except Exception:
+        emb_decision = IntentDecision()
+    if emb_decision.intent:
+        return emb_decision
+
+    kw_intent = detect_intent_keywords(text)
+    if kw_intent:
+        return IntentDecision(intent=kw_intent, source="keyword", score=0.0)
+    return IntentDecision()
+