feat: switch text risk filtering to AI-first with keyword fallback

2026-03-01 13:41:25 +08:00
parent 3c92611137
commit 1c266f2887
2 changed files with 126 additions and 5 deletions
--- a/core/pydantic_ai_agent.py
+++ b/core/pydantic_ai_agent.py
@@ -1687,16 +1687,21 @@ class CustomerServiceAgent:

        # 前置风控：客户文本一旦命中政治/敏感询问，直接拒绝，避免“发图我看看”类答非所问
        try:
-            from utils.content_filter import should_block_customer
-            map_hit = self._is_map_inquiry(message.msg)
-            political_hit = self._is_political_inquiry(message.msg)
-            if should_block_customer(message.msg) or political_hit or map_hit:
+            from utils.content_filter import should_block_customer_smart
+            risk_hit, risk_category, _risk_reason = await should_block_customer_smart(message.msg)
+            map_hit = self._is_map_inquiry(message.msg) or (risk_category == "map")
+            political_hit = self._is_political_inquiry(message.msg) or (risk_category == "political")
+            if risk_hit or political_hit or map_hit:
                # 命中敏感询问时清空待报价队列，避免旧图残留污染后续会话
                state.pending_image_urls.clear()
                state.pending_requirements.clear()
                self._sync_pending_quote_state(message.from_id, state)
                reject_text = "地图这类不做哈，这边不接地图相关需求。"
-                if political_hit and not map_hit:
+                if risk_category == "sexual":
+                    reject_text = "这类不做哈，涉黄擦边内容都不接。"
+                elif risk_category == "violent":
+                    reject_text = "这类不做哈，暴力血腥相关都不接。"
+                elif political_hit and not map_hit:
                    reject_text = "这类不做哈，政治相关图片和人物都不接。"
                reply = await self._rewrite_reply_with_ai(
                    message=message,
@@ -1711,6 +1716,7 @@ class CustomerServiceAgent:
                    customer_id=message.from_id,
                    map_hit=map_hit,
                    political_hit=political_hit,
+                    risk_category=risk_category,
                    reply=reply,
                )
                return AgentResponse(reply=reply, should_reply=True, need_transfer=False)
--- a/utils/content_filter.py
+++ b/utils/content_filter.py
@@ -7,6 +7,8 @@
 import os
 import re
 import base64
+import json
+import asyncio
 from typing import Tuple

 # 敏感词库（按类别，可扩展）
@@ -41,6 +43,8 @@ _SENSITIVE_PATTERNS = {
 }

 _COMPILED: dict = {}
+_TEXT_RISK_CACHE: dict = {}  # text -> (block, category, reason, ts)
+_TEXT_RISK_CACHE_TTL = 300


 def _get_compiled():
@@ -89,6 +93,117 @@ def should_block_customer(text: str) -> bool:
    return len(hit) > 0


+def _risk_from_keyword(text: str) -> tuple[bool, str, str]:
+    """关键词风控兜底：返回 (是否拦截, 类别, 原因)。"""
+    _, hit = filter_sensitive(text)
+    if not hit:
+        return False, "none", ""
+    cats = set(hit)
+    if "地图" in cats:
+        return True, "map", "命中地图关键词"
+    if "党政" in cats:
+        return True, "political", "命中政治关键词"
+    if {"黄色", "擦边"} & cats:
+        return True, "sexual", "命中涉黄关键词"
+    if {"暴力", "血腥"} & cats:
+        return True, "violent", "命中暴力关键词"
+    return True, "other", "命中敏感关键词"
+
+
+def _extract_json(text: str) -> dict:
+    t = (text or "").strip()
+    if not t:
+        return {}
+    try:
+        return json.loads(t)
+    except Exception:
+        pass
+    m = re.search(r"\{[\s\S]*\}", t)
+    if not m:
+        return {}
+    try:
+        return json.loads(m.group(0))
+    except Exception:
+        return {}
+
+
+async def _detect_customer_risk_with_ai(text: str) -> tuple[bool, str, str]:
+    """
+    AI 文本风控：返回 (是否拦截, 类别, 原因)。
+    类别: map/political/sexual/violent/other/none
+    """
+    raw = (text or "").strip()
+    if not raw:
+        return False, "none", ""
+    api_key = os.getenv("OPENAI_API_KEY", "").strip()
+    if not api_key:
+        return False, "none", ""
+
+    try:
+        now = asyncio.get_running_loop().time()
+    except Exception:
+        now = 0.0
+    cached = _TEXT_RISK_CACHE.get(raw)
+    if cached and (now - cached[3]) < _TEXT_RISK_CACHE_TTL:
+        return cached[0], cached[1], cached[2]
+
+    base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1").strip()
+    model = os.getenv("TEXT_RISK_MODEL", "").strip() or os.getenv("OPENAI_MODEL", "gpt-4o-mini")
+    timeout_s = float(os.getenv("AI_TEXT_RISK_TIMEOUT_SECONDS", "4"))
+
+    prompt = (
+        "你是电商客服文本风控分类器。请判断用户文本是否属于禁止接单内容。\n"
+        "禁止接单类别：\n"
+        "1) map: 地图相关（地图、行政区划图、地形图、卫星地图、导航图等）\n"
+        "2) political: 政治/党政/领导人/政治事件\n"
+        "3) sexual: 色情/擦边/裸露/性暗示\n"
+        "4) violent: 暴力/血腥\n"
+        "若不属于以上类别，category=none 且 block=false。\n"
+        "只输出 JSON，不要解释。\n"
+        "格式：{\"block\":true/false,\"category\":\"map|political|sexual|violent|other|none\",\"reason\":\"简短原因\"}\n"
+        f"用户文本：{raw}"
+    )
+
+    try:
+        from openai import AsyncOpenAI
+        client = AsyncOpenAI(api_key=api_key, base_url=base_url, timeout=timeout_s)
+        resp = await client.chat.completions.create(
+            model=model,
+            temperature=0,
+            messages=[
+                {"role": "system", "content": "你是严格 JSON 输出的风控分类器。"},
+                {"role": "user", "content": prompt},
+            ],
+        )
+        content = (resp.choices[0].message.content or "").strip()
+        data = _extract_json(content)
+        block = bool(data.get("block", False))
+        category = str(data.get("category", "none") or "none").strip().lower()
+        reason = str(data.get("reason", "") or "").strip()
+        if category not in {"map", "political", "sexual", "violent", "other", "none"}:
+            category = "none"
+            block = False
+        if category == "none":
+            block = False
+        _TEXT_RISK_CACHE[raw] = (block, category, reason, now)
+        return block, category, reason
+    except Exception:
+        return False, "none", ""
+
+
+async def should_block_customer_smart(text: str) -> tuple[bool, str, str]:
+    """
+    客户文本风控总入口：AI 优先，关键词兜底。
+    Returns: (block, category, reason)
+    """
+    ai_enabled = os.getenv("AI_TEXT_RISK_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"}
+    if ai_enabled:
+        block, cat, reason = await _detect_customer_risk_with_ai(text)
+        if block:
+            return True, cat, reason or "AI 判定敏感"
+    return _risk_from_keyword(text)
+
+
 _SENSITIVE_IMAGE_PROMPT = """请判断这张图片是否包含以下任何敏感内容：
 - 暴力（打斗、武器、伤害）
 - 血腥（伤口、血迹、残肢等）