feat: ai-first intent detection with keyword fallback

This commit is contained in:
2026-03-01 17:09:05 +08:00
parent 4a07f9c726
commit 00c80c3bec
5 changed files with 117 additions and 24 deletions

View File

@@ -148,9 +148,8 @@ def is_batch_finish_intent(text: str, state: Any, has_incoming_urls: bool) -> bo
return False
try:
from utils.intent_analyzer import detect_intent_embedding, detect_intent_keywords
intent = detect_intent_embedding(text) or detect_intent_keywords(text)
from utils.intent_analyzer import detect_intent
intent = detect_intent(text).intent
except Exception:
intent = ""
if intent in ("询价", "砍价"):

View File

@@ -210,15 +210,16 @@ def get_conversation_context(customer_id: str, acc_id: str = "", limit: int = 12
def get_intent_emotion_hint(msg: str) -> str:
"""语义匹配:意图/情绪识别注入提示。EMBEDDING_MODEL 未配置时用关键词。"""
try:
from utils.intent_analyzer import detect_emotion_embedding, detect_intent_embedding, detect_intent_keywords
from utils.intent_analyzer import detect_emotion_embedding, detect_intent
intent = detect_intent_embedding(msg)
if not intent:
intent = detect_intent_keywords(msg)
decision = detect_intent(msg)
intent = decision.intent
emotion = detect_emotion_embedding(msg) if os.getenv("EMBEDDING_MODEL") else None
parts = []
if intent:
parts.append(f"意图:{intent}")
if decision.source:
parts.append(f"意图来源:{decision.source}")
if emotion:
parts.append(f"情绪:{emotion}")
if parts:

View File

@@ -396,7 +396,14 @@ class QingjianAPIClient:
elif self._is_shop_card(data):
# 进店卡片有历史对话就不回复没有才打招呼Gemini 已在上面统一跳过)
cid = data.get('from_id', '')
if self._has_chat_history(cid):
acc_id = data.get('acc_id', '')
residual_text = self._extract_customer_text_from_shop_card_msg(data.get('msg', ''))
if residual_text:
logger.info(f"[{self.get_time()}] 进店卡片携带客户文本,转普通消息处理: {residual_text}")
patched = dict(data)
patched['msg'] = residual_text
await self._debounce_agent_reply(patched)
elif self._has_chat_history(cid, acc_id=acc_id):
logger.info(f"[{self.get_time()}] 进店卡片(已有记录),跳过")
else:
logger.info(f"[{self.get_time()}] 进店卡片(新客户),发送问候")
@@ -505,14 +512,23 @@ class QingjianAPIClient:
if self._msg_has_image_url(text):
return "image"
try:
from utils.intent_analyzer import detect_intent_keywords
intent = detect_intent_keywords(text)
from utils.intent_analyzer import detect_intent
decision = detect_intent(text)
intent = decision.intent
if intent:
self._activity_log(
"debounce_intent_detected",
intent=intent,
source=decision.source,
score=round(float(decision.score or 0.0), 4),
msg=text[:120],
)
except Exception:
intent = ""
if intent:
return intent
lower = text.lower()
if any(k in lower for k in ["报价", "多少钱", "价格", "", "优惠"]):
if any(k in lower for k in ["报价", "多少钱", "价格", "", "优惠", "收费", "怎么收费", "咋收费"]):
return "询价"
if any(k in lower for k in ["做一下", "改一下", "需求", "门头", "上面的字", "处理"]):
return "修改"
@@ -1222,17 +1238,31 @@ class QingjianAPIClient:
msg = self.to_chinese(data.get('msg', ''))
return msg.startswith('[进店卡片]') or '我想咨询你们店的这个商品' in msg
def _has_chat_history(self, customer_id: str) -> bool:
"""判断该客户是否已有聊天记录(内存历史或数据库均可)"""
def _extract_customer_text_from_shop_card_msg(self, msg: str) -> str:
"""从“进店卡片+文本”混合消息里提取客户真实文本。"""
text = self.to_chinese(msg or "").strip()
if not text:
return ""
parts = [p.strip() for p in text.split("#*#") if p and p.strip()]
kept = []
for part in parts:
if part.startswith("[进店卡片]") or "我想咨询你们店的这个商品" in part:
continue
kept.append(part)
if kept:
return " ".join(kept).strip()
stripped = re.sub(r"\[进店卡片\][^\n\r]*", "", text).strip()
stripped = stripped.replace("我想咨询你们店的这个商品", "").strip(",。,#* ")
return stripped
def _has_chat_history(self, customer_id: str, acc_id: str = "") -> bool:
"""判断该客户在当前店铺是否已有聊天记录。"""
if not customer_id:
return False
# 先查内存对话历史(最快)
if customer_id in self.agent.message_histories and self.agent.message_histories[customer_id]:
return True
# 再查数据库(重启后仍有记录)
# 按店铺+客户查数据库,避免跨店串历史导致错误跳过。
try:
from db.chat_log_db import get_conversation
msgs = get_conversation(customer_id, limit=1)
from db.chat_log_db import get_recent_conversation
msgs = get_recent_conversation(customer_id, acc_id=acc_id, limit=1)
return len(msgs) > 0
except Exception:
return False

View File

@@ -0,0 +1,24 @@
import unittest
from utils.intent_analyzer import detect_intent
class IntentAnalyzerTests(unittest.TestCase):
def test_keyword_fallback_for_price(self):
d = detect_intent("这个怎么收费")
self.assertEqual(d.intent, "询价")
self.assertEqual(d.source, "keyword")
def test_keyword_fallback_for_greeting(self):
d = detect_intent("你好 在吗")
self.assertEqual(d.intent, "打招呼")
self.assertEqual(d.source, "keyword")
def test_unknown_intent(self):
d = detect_intent("abc123")
self.assertEqual(d.intent, "")
self.assertIn(d.source, ("none", ""))
if __name__ == "__main__":
unittest.main()

View File

@@ -5,7 +5,8 @@
"""
import os
import logging
from typing import Optional, Tuple
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
@@ -31,6 +32,13 @@ EMOTION_TEMPLATES = {
_template_embeddings: dict = {}
@dataclass
class IntentDecision:
intent: str = ""
source: str = "none" # embedding / keyword / none
score: float = 0.0
def _get_embedding(text: str, cache_key: str = None) -> Optional[list]:
"""调用 embedding API失败返回 None。cache_key 用于缓存模板向量"""
model = os.getenv("EMBEDDING_MODEL", "")
@@ -66,10 +74,16 @@ def _cosine_sim(a: list, b: list) -> float:
def detect_intent_embedding(msg: str) -> Optional[str]:
"""用 embedding 检测意图,未配置或失败返回 None"""
"""用 embedding 检测意图,未配置或失败返回 None"""
decision = detect_intent_embedding_decision(msg)
return decision.intent or None
def detect_intent_embedding_decision(msg: str) -> IntentDecision:
"""返回 embedding 意图决策(含分值)。"""
msg_emb = _get_embedding(msg)
if not msg_emb:
return None
return IntentDecision()
best_intent, best_score = "", 0.0
for intent, template in INTENT_TEMPLATES.items():
tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}")
@@ -79,7 +93,9 @@ def detect_intent_embedding(msg: str) -> Optional[str]:
if sim > best_score:
best_score = sim
best_intent = intent
return best_intent if best_score > 0.6 else None
if best_score > 0.6:
return IntentDecision(intent=best_intent, source="embedding", score=float(best_score))
return IntentDecision()
def detect_emotion_embedding(msg: str) -> Optional[str]:
@@ -112,9 +128,32 @@ def detect_intent_keywords(msg: str) -> str:
return "砍价"
if any(k in m for k in ["", "修改", "不满意"]):
return "修改"
if any(k in m for k in ["多少钱", "价格", "报价", "多钱"]):
if any(k in m for k in ["多少钱", "价格", "报价", "多钱", "收费", "怎么收费", "咋收费"]):
return "询价"
if any(k in m for k in ["在吗", "你好", "有人"]):
return "打招呼"
return ""
def detect_intent(msg: str) -> IntentDecision:
"""
AI 意图判定 + 规则兜底:
1) 有 embedding 配置时先走 embedding。
2) 失败/低置信时回退关键词规则。
"""
text = (msg or "").strip()
if not text:
return IntentDecision()
try:
emb_decision = detect_intent_embedding_decision(text)
except Exception:
emb_decision = IntentDecision()
if emb_decision.intent:
return emb_decision
kw_intent = detect_intent_keywords(text)
if kw_intent:
return IntentDecision(intent=kw_intent, source="keyword", score=0.0)
return IntentDecision()