From 00c80c3beca77769ad7f2904b00592844538acb5 Mon Sep 17 00:00:00 2001 From: jimi <1847930177@qq.com> Date: Sun, 1 Mar 2026 17:09:05 +0800 Subject: [PATCH] feat: ai-first intent detection with keyword fallback --- core/collection_intent_helpers.py | 5 ++- core/context_helpers.py | 9 +++--- core/websocket_client.py | 54 ++++++++++++++++++++++++------- tests/test_intent_analyzer.py | 24 ++++++++++++++ utils/intent_analyzer.py | 49 +++++++++++++++++++++++++--- 5 files changed, 117 insertions(+), 24 deletions(-) create mode 100644 tests/test_intent_analyzer.py diff --git a/core/collection_intent_helpers.py b/core/collection_intent_helpers.py index a450703..0bd1888 100644 --- a/core/collection_intent_helpers.py +++ b/core/collection_intent_helpers.py @@ -148,9 +148,8 @@ def is_batch_finish_intent(text: str, state: Any, has_incoming_urls: bool) -> bo return False try: - from utils.intent_analyzer import detect_intent_embedding, detect_intent_keywords - - intent = detect_intent_embedding(text) or detect_intent_keywords(text) + from utils.intent_analyzer import detect_intent + intent = detect_intent(text).intent except Exception: intent = "" if intent in ("询价", "砍价"): diff --git a/core/context_helpers.py b/core/context_helpers.py index 3a2d53b..c42767d 100644 --- a/core/context_helpers.py +++ b/core/context_helpers.py @@ -210,15 +210,16 @@ def get_conversation_context(customer_id: str, acc_id: str = "", limit: int = 12 def get_intent_emotion_hint(msg: str) -> str: """语义匹配:意图/情绪识别,注入提示。EMBEDDING_MODEL 未配置时用关键词。""" try: - from utils.intent_analyzer import detect_emotion_embedding, detect_intent_embedding, detect_intent_keywords + from utils.intent_analyzer import detect_emotion_embedding, detect_intent - intent = detect_intent_embedding(msg) - if not intent: - intent = detect_intent_keywords(msg) + decision = detect_intent(msg) + intent = decision.intent emotion = detect_emotion_embedding(msg) if os.getenv("EMBEDDING_MODEL") else None parts = [] if intent: parts.append(f"意图:{intent}") + if decision.source: + parts.append(f"意图来源:{decision.source}") if emotion: parts.append(f"情绪:{emotion}") if parts: diff --git a/core/websocket_client.py b/core/websocket_client.py index a822228..8dd7a85 100755 --- a/core/websocket_client.py +++ b/core/websocket_client.py @@ -396,7 +396,14 @@ class QingjianAPIClient: elif self._is_shop_card(data): # 进店卡片:有历史对话就不回复,没有才打招呼(Gemini 已在上面统一跳过) cid = data.get('from_id', '') - if self._has_chat_history(cid): + acc_id = data.get('acc_id', '') + residual_text = self._extract_customer_text_from_shop_card_msg(data.get('msg', '')) + if residual_text: + logger.info(f"[{self.get_time()}] 进店卡片携带客户文本,转普通消息处理: {residual_text}") + patched = dict(data) + patched['msg'] = residual_text + await self._debounce_agent_reply(patched) + elif self._has_chat_history(cid, acc_id=acc_id): logger.info(f"[{self.get_time()}] 进店卡片(已有记录),跳过") else: logger.info(f"[{self.get_time()}] 进店卡片(新客户),发送问候") @@ -505,14 +512,23 @@ class QingjianAPIClient: if self._msg_has_image_url(text): return "image" try: - from utils.intent_analyzer import detect_intent_keywords - intent = detect_intent_keywords(text) + from utils.intent_analyzer import detect_intent + decision = detect_intent(text) + intent = decision.intent + if intent: + self._activity_log( + "debounce_intent_detected", + intent=intent, + source=decision.source, + score=round(float(decision.score or 0.0), 4), + msg=text[:120], + ) except Exception: intent = "" if intent: return intent lower = text.lower() - if any(k in lower for k in ["报价", "多少钱", "价格", "贵", "优惠"]): + if any(k in lower for k in ["报价", "多少钱", "价格", "贵", "优惠", "收费", "怎么收费", "咋收费"]): return "询价" if any(k in lower for k in ["做一下", "改一下", "需求", "门头", "上面的字", "处理"]): return "修改" @@ -1222,17 +1238,31 @@ class QingjianAPIClient: msg = self.to_chinese(data.get('msg', '')) return msg.startswith('[进店卡片]') or '我想咨询你们店的这个商品' in msg - def _has_chat_history(self, customer_id: str) -> bool: - """判断该客户是否已有聊天记录(内存历史或数据库均可)""" + def _extract_customer_text_from_shop_card_msg(self, msg: str) -> str: + """从“进店卡片+文本”混合消息里提取客户真实文本。""" + text = self.to_chinese(msg or "").strip() + if not text: + return "" + parts = [p.strip() for p in text.split("#*#") if p and p.strip()] + kept = [] + for part in parts: + if part.startswith("[进店卡片]") or "我想咨询你们店的这个商品" in part: + continue + kept.append(part) + if kept: + return " ".join(kept).strip() + stripped = re.sub(r"\[进店卡片\][^\n\r]*", "", text).strip() + stripped = stripped.replace("我想咨询你们店的这个商品", "").strip(",。,#* ") + return stripped + + def _has_chat_history(self, customer_id: str, acc_id: str = "") -> bool: + """判断该客户在当前店铺是否已有聊天记录。""" if not customer_id: return False - # 先查内存对话历史(最快) - if customer_id in self.agent.message_histories and self.agent.message_histories[customer_id]: - return True - # 再查数据库(重启后仍有记录) + # 按店铺+客户查数据库,避免跨店串历史导致错误跳过。 try: - from db.chat_log_db import get_conversation - msgs = get_conversation(customer_id, limit=1) + from db.chat_log_db import get_recent_conversation + msgs = get_recent_conversation(customer_id, acc_id=acc_id, limit=1) return len(msgs) > 0 except Exception: return False diff --git a/tests/test_intent_analyzer.py b/tests/test_intent_analyzer.py new file mode 100644 index 0000000..582fe70 --- /dev/null +++ b/tests/test_intent_analyzer.py @@ -0,0 +1,24 @@ +import unittest + +from utils.intent_analyzer import detect_intent + + +class IntentAnalyzerTests(unittest.TestCase): + def test_keyword_fallback_for_price(self): + d = detect_intent("这个怎么收费") + self.assertEqual(d.intent, "询价") + self.assertEqual(d.source, "keyword") + + def test_keyword_fallback_for_greeting(self): + d = detect_intent("你好 在吗") + self.assertEqual(d.intent, "打招呼") + self.assertEqual(d.source, "keyword") + + def test_unknown_intent(self): + d = detect_intent("abc123") + self.assertEqual(d.intent, "") + self.assertIn(d.source, ("none", "")) + + +if __name__ == "__main__": + unittest.main() diff --git a/utils/intent_analyzer.py b/utils/intent_analyzer.py index a2f2513..70fc499 100755 --- a/utils/intent_analyzer.py +++ b/utils/intent_analyzer.py @@ -5,7 +5,8 @@ """ import os import logging -from typing import Optional, Tuple +from dataclasses import dataclass +from typing import Optional logger = logging.getLogger(__name__) @@ -31,6 +32,13 @@ EMOTION_TEMPLATES = { _template_embeddings: dict = {} + +@dataclass +class IntentDecision: + intent: str = "" + source: str = "none" # embedding / keyword / none + score: float = 0.0 + def _get_embedding(text: str, cache_key: str = None) -> Optional[list]: """调用 embedding API,失败返回 None。cache_key 用于缓存模板向量""" model = os.getenv("EMBEDDING_MODEL", "") @@ -66,10 +74,16 @@ def _cosine_sim(a: list, b: list) -> float: def detect_intent_embedding(msg: str) -> Optional[str]: - """用 embedding 检测意图,未配置或失败返回 None""" + """用 embedding 检测意图,未配置或失败返回 None。""" + decision = detect_intent_embedding_decision(msg) + return decision.intent or None + + +def detect_intent_embedding_decision(msg: str) -> IntentDecision: + """返回 embedding 意图决策(含分值)。""" msg_emb = _get_embedding(msg) if not msg_emb: - return None + return IntentDecision() best_intent, best_score = "", 0.0 for intent, template in INTENT_TEMPLATES.items(): tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}") @@ -79,7 +93,9 @@ def detect_intent_embedding(msg: str) -> Optional[str]: if sim > best_score: best_score = sim best_intent = intent - return best_intent if best_score > 0.6 else None + if best_score > 0.6: + return IntentDecision(intent=best_intent, source="embedding", score=float(best_score)) + return IntentDecision() def detect_emotion_embedding(msg: str) -> Optional[str]: @@ -112,9 +128,32 @@ def detect_intent_keywords(msg: str) -> str: return "砍价" if any(k in m for k in ["改", "修改", "不满意"]): return "修改" - if any(k in m for k in ["多少钱", "价格", "报价", "多钱"]): + if any(k in m for k in ["多少钱", "价格", "报价", "多钱", "收费", "怎么收费", "咋收费"]): return "询价" if any(k in m for k in ["在吗", "你好", "有人"]): return "打招呼" return "" + +def detect_intent(msg: str) -> IntentDecision: + """ + AI 意图判定 + 规则兜底: + 1) 有 embedding 配置时先走 embedding。 + 2) 失败/低置信时回退关键词规则。 + """ + text = (msg or "").strip() + if not text: + return IntentDecision() + + try: + emb_decision = detect_intent_embedding_decision(text) + except Exception: + emb_decision = IntentDecision() + if emb_decision.intent: + return emb_decision + + kw_intent = detect_intent_keywords(text) + if kw_intent: + return IntentDecision(intent=kw_intent, source="keyword", score=0.0) + return IntentDecision() +