feat: ai-first intent detection with keyword fallback
This commit is contained in:
@@ -148,9 +148,8 @@ def is_batch_finish_intent(text: str, state: Any, has_incoming_urls: bool) -> bo
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from utils.intent_analyzer import detect_intent_embedding, detect_intent_keywords
|
from utils.intent_analyzer import detect_intent
|
||||||
|
intent = detect_intent(text).intent
|
||||||
intent = detect_intent_embedding(text) or detect_intent_keywords(text)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
intent = ""
|
intent = ""
|
||||||
if intent in ("询价", "砍价"):
|
if intent in ("询价", "砍价"):
|
||||||
|
|||||||
@@ -210,15 +210,16 @@ def get_conversation_context(customer_id: str, acc_id: str = "", limit: int = 12
|
|||||||
def get_intent_emotion_hint(msg: str) -> str:
|
def get_intent_emotion_hint(msg: str) -> str:
|
||||||
"""语义匹配:意图/情绪识别,注入提示。EMBEDDING_MODEL 未配置时用关键词。"""
|
"""语义匹配:意图/情绪识别,注入提示。EMBEDDING_MODEL 未配置时用关键词。"""
|
||||||
try:
|
try:
|
||||||
from utils.intent_analyzer import detect_emotion_embedding, detect_intent_embedding, detect_intent_keywords
|
from utils.intent_analyzer import detect_emotion_embedding, detect_intent
|
||||||
|
|
||||||
intent = detect_intent_embedding(msg)
|
decision = detect_intent(msg)
|
||||||
if not intent:
|
intent = decision.intent
|
||||||
intent = detect_intent_keywords(msg)
|
|
||||||
emotion = detect_emotion_embedding(msg) if os.getenv("EMBEDDING_MODEL") else None
|
emotion = detect_emotion_embedding(msg) if os.getenv("EMBEDDING_MODEL") else None
|
||||||
parts = []
|
parts = []
|
||||||
if intent:
|
if intent:
|
||||||
parts.append(f"意图:{intent}")
|
parts.append(f"意图:{intent}")
|
||||||
|
if decision.source:
|
||||||
|
parts.append(f"意图来源:{decision.source}")
|
||||||
if emotion:
|
if emotion:
|
||||||
parts.append(f"情绪:{emotion}")
|
parts.append(f"情绪:{emotion}")
|
||||||
if parts:
|
if parts:
|
||||||
|
|||||||
@@ -396,7 +396,14 @@ class QingjianAPIClient:
|
|||||||
elif self._is_shop_card(data):
|
elif self._is_shop_card(data):
|
||||||
# 进店卡片:有历史对话就不回复,没有才打招呼(Gemini 已在上面统一跳过)
|
# 进店卡片:有历史对话就不回复,没有才打招呼(Gemini 已在上面统一跳过)
|
||||||
cid = data.get('from_id', '')
|
cid = data.get('from_id', '')
|
||||||
if self._has_chat_history(cid):
|
acc_id = data.get('acc_id', '')
|
||||||
|
residual_text = self._extract_customer_text_from_shop_card_msg(data.get('msg', ''))
|
||||||
|
if residual_text:
|
||||||
|
logger.info(f"[{self.get_time()}] 进店卡片携带客户文本,转普通消息处理: {residual_text}")
|
||||||
|
patched = dict(data)
|
||||||
|
patched['msg'] = residual_text
|
||||||
|
await self._debounce_agent_reply(patched)
|
||||||
|
elif self._has_chat_history(cid, acc_id=acc_id):
|
||||||
logger.info(f"[{self.get_time()}] 进店卡片(已有记录),跳过")
|
logger.info(f"[{self.get_time()}] 进店卡片(已有记录),跳过")
|
||||||
else:
|
else:
|
||||||
logger.info(f"[{self.get_time()}] 进店卡片(新客户),发送问候")
|
logger.info(f"[{self.get_time()}] 进店卡片(新客户),发送问候")
|
||||||
@@ -505,14 +512,23 @@ class QingjianAPIClient:
|
|||||||
if self._msg_has_image_url(text):
|
if self._msg_has_image_url(text):
|
||||||
return "image"
|
return "image"
|
||||||
try:
|
try:
|
||||||
from utils.intent_analyzer import detect_intent_keywords
|
from utils.intent_analyzer import detect_intent
|
||||||
intent = detect_intent_keywords(text)
|
decision = detect_intent(text)
|
||||||
|
intent = decision.intent
|
||||||
|
if intent:
|
||||||
|
self._activity_log(
|
||||||
|
"debounce_intent_detected",
|
||||||
|
intent=intent,
|
||||||
|
source=decision.source,
|
||||||
|
score=round(float(decision.score or 0.0), 4),
|
||||||
|
msg=text[:120],
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
intent = ""
|
intent = ""
|
||||||
if intent:
|
if intent:
|
||||||
return intent
|
return intent
|
||||||
lower = text.lower()
|
lower = text.lower()
|
||||||
if any(k in lower for k in ["报价", "多少钱", "价格", "贵", "优惠"]):
|
if any(k in lower for k in ["报价", "多少钱", "价格", "贵", "优惠", "收费", "怎么收费", "咋收费"]):
|
||||||
return "询价"
|
return "询价"
|
||||||
if any(k in lower for k in ["做一下", "改一下", "需求", "门头", "上面的字", "处理"]):
|
if any(k in lower for k in ["做一下", "改一下", "需求", "门头", "上面的字", "处理"]):
|
||||||
return "修改"
|
return "修改"
|
||||||
@@ -1222,17 +1238,31 @@ class QingjianAPIClient:
|
|||||||
msg = self.to_chinese(data.get('msg', ''))
|
msg = self.to_chinese(data.get('msg', ''))
|
||||||
return msg.startswith('[进店卡片]') or '我想咨询你们店的这个商品' in msg
|
return msg.startswith('[进店卡片]') or '我想咨询你们店的这个商品' in msg
|
||||||
|
|
||||||
def _has_chat_history(self, customer_id: str) -> bool:
|
def _extract_customer_text_from_shop_card_msg(self, msg: str) -> str:
|
||||||
"""判断该客户是否已有聊天记录(内存历史或数据库均可)"""
|
"""从“进店卡片+文本”混合消息里提取客户真实文本。"""
|
||||||
|
text = self.to_chinese(msg or "").strip()
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
parts = [p.strip() for p in text.split("#*#") if p and p.strip()]
|
||||||
|
kept = []
|
||||||
|
for part in parts:
|
||||||
|
if part.startswith("[进店卡片]") or "我想咨询你们店的这个商品" in part:
|
||||||
|
continue
|
||||||
|
kept.append(part)
|
||||||
|
if kept:
|
||||||
|
return " ".join(kept).strip()
|
||||||
|
stripped = re.sub(r"\[进店卡片\][^\n\r]*", "", text).strip()
|
||||||
|
stripped = stripped.replace("我想咨询你们店的这个商品", "").strip(",。,#* ")
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
def _has_chat_history(self, customer_id: str, acc_id: str = "") -> bool:
|
||||||
|
"""判断该客户在当前店铺是否已有聊天记录。"""
|
||||||
if not customer_id:
|
if not customer_id:
|
||||||
return False
|
return False
|
||||||
# 先查内存对话历史(最快)
|
# 按店铺+客户查数据库,避免跨店串历史导致错误跳过。
|
||||||
if customer_id in self.agent.message_histories and self.agent.message_histories[customer_id]:
|
|
||||||
return True
|
|
||||||
# 再查数据库(重启后仍有记录)
|
|
||||||
try:
|
try:
|
||||||
from db.chat_log_db import get_conversation
|
from db.chat_log_db import get_recent_conversation
|
||||||
msgs = get_conversation(customer_id, limit=1)
|
msgs = get_recent_conversation(customer_id, acc_id=acc_id, limit=1)
|
||||||
return len(msgs) > 0
|
return len(msgs) > 0
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|||||||
24
tests/test_intent_analyzer.py
Normal file
24
tests/test_intent_analyzer.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from utils.intent_analyzer import detect_intent
|
||||||
|
|
||||||
|
|
||||||
|
class IntentAnalyzerTests(unittest.TestCase):
|
||||||
|
def test_keyword_fallback_for_price(self):
|
||||||
|
d = detect_intent("这个怎么收费")
|
||||||
|
self.assertEqual(d.intent, "询价")
|
||||||
|
self.assertEqual(d.source, "keyword")
|
||||||
|
|
||||||
|
def test_keyword_fallback_for_greeting(self):
|
||||||
|
d = detect_intent("你好 在吗")
|
||||||
|
self.assertEqual(d.intent, "打招呼")
|
||||||
|
self.assertEqual(d.source, "keyword")
|
||||||
|
|
||||||
|
def test_unknown_intent(self):
|
||||||
|
d = detect_intent("abc123")
|
||||||
|
self.assertEqual(d.intent, "")
|
||||||
|
self.assertIn(d.source, ("none", ""))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -5,7 +5,8 @@
|
|||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional, Tuple
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -31,6 +32,13 @@ EMOTION_TEMPLATES = {
|
|||||||
|
|
||||||
_template_embeddings: dict = {}
|
_template_embeddings: dict = {}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IntentDecision:
|
||||||
|
intent: str = ""
|
||||||
|
source: str = "none" # embedding / keyword / none
|
||||||
|
score: float = 0.0
|
||||||
|
|
||||||
def _get_embedding(text: str, cache_key: str = None) -> Optional[list]:
|
def _get_embedding(text: str, cache_key: str = None) -> Optional[list]:
|
||||||
"""调用 embedding API,失败返回 None。cache_key 用于缓存模板向量"""
|
"""调用 embedding API,失败返回 None。cache_key 用于缓存模板向量"""
|
||||||
model = os.getenv("EMBEDDING_MODEL", "")
|
model = os.getenv("EMBEDDING_MODEL", "")
|
||||||
@@ -66,10 +74,16 @@ def _cosine_sim(a: list, b: list) -> float:
|
|||||||
|
|
||||||
|
|
||||||
def detect_intent_embedding(msg: str) -> Optional[str]:
|
def detect_intent_embedding(msg: str) -> Optional[str]:
|
||||||
"""用 embedding 检测意图,未配置或失败返回 None"""
|
"""用 embedding 检测意图,未配置或失败返回 None。"""
|
||||||
|
decision = detect_intent_embedding_decision(msg)
|
||||||
|
return decision.intent or None
|
||||||
|
|
||||||
|
|
||||||
|
def detect_intent_embedding_decision(msg: str) -> IntentDecision:
|
||||||
|
"""返回 embedding 意图决策(含分值)。"""
|
||||||
msg_emb = _get_embedding(msg)
|
msg_emb = _get_embedding(msg)
|
||||||
if not msg_emb:
|
if not msg_emb:
|
||||||
return None
|
return IntentDecision()
|
||||||
best_intent, best_score = "", 0.0
|
best_intent, best_score = "", 0.0
|
||||||
for intent, template in INTENT_TEMPLATES.items():
|
for intent, template in INTENT_TEMPLATES.items():
|
||||||
tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}")
|
tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}")
|
||||||
@@ -79,7 +93,9 @@ def detect_intent_embedding(msg: str) -> Optional[str]:
|
|||||||
if sim > best_score:
|
if sim > best_score:
|
||||||
best_score = sim
|
best_score = sim
|
||||||
best_intent = intent
|
best_intent = intent
|
||||||
return best_intent if best_score > 0.6 else None
|
if best_score > 0.6:
|
||||||
|
return IntentDecision(intent=best_intent, source="embedding", score=float(best_score))
|
||||||
|
return IntentDecision()
|
||||||
|
|
||||||
|
|
||||||
def detect_emotion_embedding(msg: str) -> Optional[str]:
|
def detect_emotion_embedding(msg: str) -> Optional[str]:
|
||||||
@@ -112,9 +128,32 @@ def detect_intent_keywords(msg: str) -> str:
|
|||||||
return "砍价"
|
return "砍价"
|
||||||
if any(k in m for k in ["改", "修改", "不满意"]):
|
if any(k in m for k in ["改", "修改", "不满意"]):
|
||||||
return "修改"
|
return "修改"
|
||||||
if any(k in m for k in ["多少钱", "价格", "报价", "多钱"]):
|
if any(k in m for k in ["多少钱", "价格", "报价", "多钱", "收费", "怎么收费", "咋收费"]):
|
||||||
return "询价"
|
return "询价"
|
||||||
if any(k in m for k in ["在吗", "你好", "有人"]):
|
if any(k in m for k in ["在吗", "你好", "有人"]):
|
||||||
return "打招呼"
|
return "打招呼"
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def detect_intent(msg: str) -> IntentDecision:
|
||||||
|
"""
|
||||||
|
AI 意图判定 + 规则兜底:
|
||||||
|
1) 有 embedding 配置时先走 embedding。
|
||||||
|
2) 失败/低置信时回退关键词规则。
|
||||||
|
"""
|
||||||
|
text = (msg or "").strip()
|
||||||
|
if not text:
|
||||||
|
return IntentDecision()
|
||||||
|
|
||||||
|
try:
|
||||||
|
emb_decision = detect_intent_embedding_decision(text)
|
||||||
|
except Exception:
|
||||||
|
emb_decision = IntentDecision()
|
||||||
|
if emb_decision.intent:
|
||||||
|
return emb_decision
|
||||||
|
|
||||||
|
kw_intent = detect_intent_keywords(text)
|
||||||
|
if kw_intent:
|
||||||
|
return IntentDecision(intent=kw_intent, source="keyword", score=0.0)
|
||||||
|
return IntentDecision()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user