Files
tw/utils/intent_analyzer.py

160 lines
5.0 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
语义匹配 - 用 embedding 做意图/情绪识别
配置 EMBEDDING_MODEL 后启用,否则回退到关键词
"""
import os
import logging
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
# 意图模板(用于 embedding 相似度匹配)
INTENT_TEMPLATES = {
"询价": "我想问一下价格多少钱",
"发图": "我发图给你看看",
"砍价": "能不能便宜点太贵了",
"批量": "我要做很多张图批量",
"加急": "能不能快点很急",
"售后": "已经付款了什么时候好",
"修改": "不满意要改一下",
"转接": "我要退款投诉",
"打招呼": "你好在吗有人吗",
}
EMOTION_TEMPLATES = {
"平静": "好的谢谢",
"着急": "快点啊很急",
"不满": "怎么这么慢不满意",
"砍价": "太贵了便宜点",
}
_template_embeddings: dict = {}
@dataclass
class IntentDecision:
intent: str = ""
source: str = "none" # embedding / keyword / none
score: float = 0.0
def _get_embedding(text: str, cache_key: str = None) -> Optional[list]:
"""调用 embedding API失败返回 None。cache_key 用于缓存模板向量"""
model = os.getenv("EMBEDDING_MODEL", "")
if not model:
return None
if cache_key and cache_key in _template_embeddings:
return _template_embeddings[cache_key]
try:
from openai import OpenAI
client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL"),
)
resp = client.embeddings.create(model=model, input=text[:2000])
emb = resp.data[0].embedding
if cache_key:
_template_embeddings[cache_key] = emb
return emb
except Exception as e:
logger.debug(f"embedding 失败: {e}")
return None
def _cosine_sim(a: list, b: list) -> float:
if not a or not b or len(a) != len(b):
return 0.0
dot = sum(x * y for x, y in zip(a, b))
na = sum(x * x for x in a) ** 0.5
nb = sum(y * y for y in b) ** 0.5
if na == 0 or nb == 0:
return 0.0
return dot / (na * nb)
def detect_intent_embedding(msg: str) -> Optional[str]:
"""用 embedding 检测意图,未配置或失败返回 None。"""
decision = detect_intent_embedding_decision(msg)
return decision.intent or None
def detect_intent_embedding_decision(msg: str) -> IntentDecision:
"""返回 embedding 意图决策(含分值)。"""
msg_emb = _get_embedding(msg)
if not msg_emb:
return IntentDecision()
best_intent, best_score = "", 0.0
for intent, template in INTENT_TEMPLATES.items():
tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}")
if not tpl_emb:
continue
sim = _cosine_sim(msg_emb, tpl_emb)
if sim > best_score:
best_score = sim
best_intent = intent
if best_score > 0.6:
return IntentDecision(intent=best_intent, source="embedding", score=float(best_score))
return IntentDecision()
def detect_emotion_embedding(msg: str) -> Optional[str]:
"""用 embedding 检测情绪"""
msg_emb = _get_embedding(msg)
if not msg_emb:
return None
best_emotion, best_score = "", 0.0
for emotion, template in EMOTION_TEMPLATES.items():
tpl_emb = _get_embedding(template, cache_key=f"emotion_{emotion}")
if not tpl_emb:
continue
sim = _cosine_sim(msg_emb, tpl_emb)
if sim > best_score:
best_score = sim
best_emotion = emotion
return best_emotion if best_score > 0.55 else None
def detect_intent_keywords(msg: str) -> str:
"""关键词回退:无 embedding 时使用"""
m = (msg or "").strip().lower()
if any(k in m for k in ["退款", "退货", "投诉"]):
return "转接"
if any(k in m for k in ["多张", "批量", "很多", "几十张"]):
return "批量"
if any(k in m for k in ["快点", "加急", "很急", "着急"]):
return "加急"
if any(k in m for k in ["便宜", "", "少点", "打折"]):
return "砍价"
if any(k in m for k in ["", "修改", "不满意"]):
return "修改"
if any(k in m for k in ["多少钱", "价格", "报价", "多钱", "收费", "怎么收费", "咋收费"]):
return "询价"
if any(k in m for k in ["在吗", "你好", "有人"]):
return "打招呼"
return ""
def detect_intent(msg: str) -> IntentDecision:
"""
AI 意图判定 + 规则兜底:
1) 有 embedding 配置时先走 embedding。
2) 失败/低置信时回退关键词规则。
"""
text = (msg or "").strip()
if not text:
return IntentDecision()
try:
emb_decision = detect_intent_embedding_decision(text)
except Exception:
emb_decision = IntentDecision()
if emb_decision.intent:
return emb_decision
kw_intent = detect_intent_keywords(text)
if kw_intent:
return IntentDecision(intent=kw_intent, source="keyword", score=0.0)
return IntentDecision()