refactor: migrate workflow to v2 core and archive legacy modules
This commit is contained in:
159
legacy/intent_analyzer.py
Normal file
159
legacy/intent_analyzer.py
Normal file
@@ -0,0 +1,159 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
语义匹配 - 用 embedding 做意图/情绪识别
|
||||
配置 EMBEDDING_MODEL 后启用,否则回退到关键词
|
||||
"""
|
||||
import os
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 意图模板(用于 embedding 相似度匹配)
|
||||
INTENT_TEMPLATES = {
|
||||
"询价": "我想问一下价格多少钱",
|
||||
"发图": "我发图给你看看",
|
||||
"砍价": "能不能便宜点太贵了",
|
||||
"批量": "我要做很多张图批量",
|
||||
"加急": "能不能快点很急",
|
||||
"售后": "已经付款了什么时候好",
|
||||
"修改": "不满意要改一下",
|
||||
"转接": "我要退款投诉",
|
||||
"打招呼": "你好在吗有人吗",
|
||||
}
|
||||
EMOTION_TEMPLATES = {
|
||||
"平静": "好的谢谢",
|
||||
"着急": "快点啊很急",
|
||||
"不满": "怎么这么慢不满意",
|
||||
"砍价": "太贵了便宜点",
|
||||
}
|
||||
|
||||
|
||||
_template_embeddings: dict = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntentDecision:
|
||||
intent: str = ""
|
||||
source: str = "none" # embedding / keyword / none
|
||||
score: float = 0.0
|
||||
|
||||
def _get_embedding(text: str, cache_key: str = None) -> Optional[list]:
|
||||
"""调用 embedding API,失败返回 None。cache_key 用于缓存模板向量"""
|
||||
model = os.getenv("EMBEDDING_MODEL", "")
|
||||
if not model:
|
||||
return None
|
||||
if cache_key and cache_key in _template_embeddings:
|
||||
return _template_embeddings[cache_key]
|
||||
try:
|
||||
from openai import OpenAI
|
||||
client = OpenAI(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
base_url=os.getenv("OPENAI_BASE_URL"),
|
||||
)
|
||||
resp = client.embeddings.create(model=model, input=text[:2000])
|
||||
emb = resp.data[0].embedding
|
||||
if cache_key:
|
||||
_template_embeddings[cache_key] = emb
|
||||
return emb
|
||||
except Exception as e:
|
||||
logger.debug(f"embedding 失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _cosine_sim(a: list, b: list) -> float:
|
||||
if not a or not b or len(a) != len(b):
|
||||
return 0.0
|
||||
dot = sum(x * y for x, y in zip(a, b))
|
||||
na = sum(x * x for x in a) ** 0.5
|
||||
nb = sum(y * y for y in b) ** 0.5
|
||||
if na == 0 or nb == 0:
|
||||
return 0.0
|
||||
return dot / (na * nb)
|
||||
|
||||
|
||||
def detect_intent_embedding(msg: str) -> Optional[str]:
|
||||
"""用 embedding 检测意图,未配置或失败返回 None。"""
|
||||
decision = detect_intent_embedding_decision(msg)
|
||||
return decision.intent or None
|
||||
|
||||
|
||||
def detect_intent_embedding_decision(msg: str) -> IntentDecision:
|
||||
"""返回 embedding 意图决策(含分值)。"""
|
||||
msg_emb = _get_embedding(msg)
|
||||
if not msg_emb:
|
||||
return IntentDecision()
|
||||
best_intent, best_score = "", 0.0
|
||||
for intent, template in INTENT_TEMPLATES.items():
|
||||
tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}")
|
||||
if not tpl_emb:
|
||||
continue
|
||||
sim = _cosine_sim(msg_emb, tpl_emb)
|
||||
if sim > best_score:
|
||||
best_score = sim
|
||||
best_intent = intent
|
||||
if best_score > 0.6:
|
||||
return IntentDecision(intent=best_intent, source="embedding", score=float(best_score))
|
||||
return IntentDecision()
|
||||
|
||||
|
||||
def detect_emotion_embedding(msg: str) -> Optional[str]:
|
||||
"""用 embedding 检测情绪"""
|
||||
msg_emb = _get_embedding(msg)
|
||||
if not msg_emb:
|
||||
return None
|
||||
best_emotion, best_score = "", 0.0
|
||||
for emotion, template in EMOTION_TEMPLATES.items():
|
||||
tpl_emb = _get_embedding(template, cache_key=f"emotion_{emotion}")
|
||||
if not tpl_emb:
|
||||
continue
|
||||
sim = _cosine_sim(msg_emb, tpl_emb)
|
||||
if sim > best_score:
|
||||
best_score = sim
|
||||
best_emotion = emotion
|
||||
return best_emotion if best_score > 0.55 else None
|
||||
|
||||
|
||||
def detect_intent_keywords(msg: str) -> str:
|
||||
"""关键词回退:无 embedding 时使用"""
|
||||
m = (msg or "").strip().lower()
|
||||
if any(k in m for k in ["退款", "退货", "投诉"]):
|
||||
return "转接"
|
||||
if any(k in m for k in ["多张", "批量", "很多", "几十张"]):
|
||||
return "批量"
|
||||
if any(k in m for k in ["快点", "加急", "很急", "着急"]):
|
||||
return "加急"
|
||||
if any(k in m for k in ["便宜", "贵", "少点", "打折"]):
|
||||
return "砍价"
|
||||
if any(k in m for k in ["改", "修改", "不满意"]):
|
||||
return "修改"
|
||||
if any(k in m for k in ["多少钱", "价格", "报价", "多钱", "收费", "怎么收费", "咋收费"]):
|
||||
return "询价"
|
||||
if any(k in m for k in ["在吗", "你好", "有人"]):
|
||||
return "打招呼"
|
||||
return ""
|
||||
|
||||
|
||||
def detect_intent(msg: str) -> IntentDecision:
|
||||
"""
|
||||
AI 意图判定 + 规则兜底:
|
||||
1) 有 embedding 配置时先走 embedding。
|
||||
2) 失败/低置信时回退关键词规则。
|
||||
"""
|
||||
text = (msg or "").strip()
|
||||
if not text:
|
||||
return IntentDecision()
|
||||
|
||||
try:
|
||||
emb_decision = detect_intent_embedding_decision(text)
|
||||
except Exception:
|
||||
emb_decision = IntentDecision()
|
||||
if emb_decision.intent:
|
||||
return emb_decision
|
||||
|
||||
kw_intent = detect_intent_keywords(text)
|
||||
if kw_intent:
|
||||
return IntentDecision(intent=kw_intent, source="keyword", score=0.0)
|
||||
return IntentDecision()
|
||||
|
||||
Reference in New Issue
Block a user