# -*- coding: utf-8 -*- """ 语义匹配 - 用 embedding 做意图/情绪识别 配置 EMBEDDING_MODEL 后启用,否则回退到关键词 """ import os import logging from typing import Optional, Tuple logger = logging.getLogger(__name__) # 意图模板(用于 embedding 相似度匹配) INTENT_TEMPLATES = { "询价": "我想问一下价格多少钱", "发图": "我发图给你看看", "砍价": "能不能便宜点太贵了", "批量": "我要做很多张图批量", "加急": "能不能快点很急", "售后": "已经付款了什么时候好", "修改": "不满意要改一下", "转接": "我要退款投诉", "打招呼": "你好在吗有人吗", } EMOTION_TEMPLATES = { "平静": "好的谢谢", "着急": "快点啊很急", "不满": "怎么这么慢不满意", "砍价": "太贵了便宜点", } _template_embeddings: dict = {} def _get_embedding(text: str, cache_key: str = None) -> Optional[list]: """调用 embedding API,失败返回 None。cache_key 用于缓存模板向量""" model = os.getenv("EMBEDDING_MODEL", "") if not model: return None if cache_key and cache_key in _template_embeddings: return _template_embeddings[cache_key] try: from openai import OpenAI client = OpenAI( api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL"), ) resp = client.embeddings.create(model=model, input=text[:2000]) emb = resp.data[0].embedding if cache_key: _template_embeddings[cache_key] = emb return emb except Exception as e: logger.debug(f"embedding 失败: {e}") return None def _cosine_sim(a: list, b: list) -> float: if not a or not b or len(a) != len(b): return 0.0 dot = sum(x * y for x, y in zip(a, b)) na = sum(x * x for x in a) ** 0.5 nb = sum(y * y for y in b) ** 0.5 if na == 0 or nb == 0: return 0.0 return dot / (na * nb) def detect_intent_embedding(msg: str) -> Optional[str]: """用 embedding 检测意图,未配置或失败返回 None""" msg_emb = _get_embedding(msg) if not msg_emb: return None best_intent, best_score = "", 0.0 for intent, template in INTENT_TEMPLATES.items(): tpl_emb = _get_embedding(template, cache_key=f"intent_{intent}") if not tpl_emb: continue sim = _cosine_sim(msg_emb, tpl_emb) if sim > best_score: best_score = sim best_intent = intent return best_intent if best_score > 0.6 else None def detect_emotion_embedding(msg: str) -> Optional[str]: """用 embedding 检测情绪""" msg_emb = _get_embedding(msg) if not msg_emb: return None best_emotion, best_score = "", 0.0 for emotion, template in EMOTION_TEMPLATES.items(): tpl_emb = _get_embedding(template, cache_key=f"emotion_{emotion}") if not tpl_emb: continue sim = _cosine_sim(msg_emb, tpl_emb) if sim > best_score: best_score = sim best_emotion = emotion return best_emotion if best_score > 0.55 else None def detect_intent_keywords(msg: str) -> str: """关键词回退:无 embedding 时使用""" m = (msg or "").strip().lower() if any(k in m for k in ["退款", "退货", "投诉"]): return "转接" if any(k in m for k in ["多张", "批量", "很多", "几十张"]): return "批量" if any(k in m for k in ["快点", "加急", "很急", "着急"]): return "加急" if any(k in m for k in ["便宜", "贵", "少点", "打折"]): return "砍价" if any(k in m for k in ["改", "修改", "不满意"]): return "修改" if any(k in m for k in ["多少钱", "价格", "报价", "多钱"]): return "询价" if any(k in m for k in ["在吗", "你好", "有人"]): return "打招呼" return ""