From 1c266f2887ffa3996d067bbe606b4682642572a9 Mon Sep 17 00:00:00 2001 From: jimi <1847930177@qq.com> Date: Sun, 1 Mar 2026 13:41:25 +0800 Subject: [PATCH] feat: switch text risk filtering to AI-first with keyword fallback --- core/pydantic_ai_agent.py | 16 ++++-- utils/content_filter.py | 115 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 5 deletions(-) diff --git a/core/pydantic_ai_agent.py b/core/pydantic_ai_agent.py index ca72e39..41ac84e 100755 --- a/core/pydantic_ai_agent.py +++ b/core/pydantic_ai_agent.py @@ -1687,16 +1687,21 @@ class CustomerServiceAgent: # 前置风控:客户文本一旦命中政治/敏感询问,直接拒绝,避免“发图我看看”类答非所问 try: - from utils.content_filter import should_block_customer - map_hit = self._is_map_inquiry(message.msg) - political_hit = self._is_political_inquiry(message.msg) - if should_block_customer(message.msg) or political_hit or map_hit: + from utils.content_filter import should_block_customer_smart + risk_hit, risk_category, _risk_reason = await should_block_customer_smart(message.msg) + map_hit = self._is_map_inquiry(message.msg) or (risk_category == "map") + political_hit = self._is_political_inquiry(message.msg) or (risk_category == "political") + if risk_hit or political_hit or map_hit: # 命中敏感询问时清空待报价队列,避免旧图残留污染后续会话 state.pending_image_urls.clear() state.pending_requirements.clear() self._sync_pending_quote_state(message.from_id, state) reject_text = "地图这类不做哈,这边不接地图相关需求。" - if political_hit and not map_hit: + if risk_category == "sexual": + reject_text = "这类不做哈,涉黄擦边内容都不接。" + elif risk_category == "violent": + reject_text = "这类不做哈,暴力血腥相关都不接。" + elif political_hit and not map_hit: reject_text = "这类不做哈,政治相关图片和人物都不接。" reply = await self._rewrite_reply_with_ai( message=message, @@ -1711,6 +1716,7 @@ class CustomerServiceAgent: customer_id=message.from_id, map_hit=map_hit, political_hit=political_hit, + risk_category=risk_category, reply=reply, ) return AgentResponse(reply=reply, should_reply=True, need_transfer=False) diff --git a/utils/content_filter.py b/utils/content_filter.py index 8fc055d..82c956d 100755 --- a/utils/content_filter.py +++ b/utils/content_filter.py @@ -7,6 +7,8 @@ import os import re import base64 +import json +import asyncio from typing import Tuple # 敏感词库(按类别,可扩展) @@ -41,6 +43,8 @@ _SENSITIVE_PATTERNS = { } _COMPILED: dict = {} +_TEXT_RISK_CACHE: dict = {} # text -> (block, category, reason, ts) +_TEXT_RISK_CACHE_TTL = 300 def _get_compiled(): @@ -89,6 +93,117 @@ def should_block_customer(text: str) -> bool: return len(hit) > 0 +def _risk_from_keyword(text: str) -> tuple[bool, str, str]: + """关键词风控兜底:返回 (是否拦截, 类别, 原因)。""" + _, hit = filter_sensitive(text) + if not hit: + return False, "none", "" + cats = set(hit) + if "地图" in cats: + return True, "map", "命中地图关键词" + if "党政" in cats: + return True, "political", "命中政治关键词" + if {"黄色", "擦边"} & cats: + return True, "sexual", "命中涉黄关键词" + if {"暴力", "血腥"} & cats: + return True, "violent", "命中暴力关键词" + return True, "other", "命中敏感关键词" + + +def _extract_json(text: str) -> dict: + t = (text or "").strip() + if not t: + return {} + try: + return json.loads(t) + except Exception: + pass + m = re.search(r"\{[\s\S]*\}", t) + if not m: + return {} + try: + return json.loads(m.group(0)) + except Exception: + return {} + + +async def _detect_customer_risk_with_ai(text: str) -> tuple[bool, str, str]: + """ + AI 文本风控:返回 (是否拦截, 类别, 原因)。 + 类别: map/political/sexual/violent/other/none + """ + raw = (text or "").strip() + if not raw: + return False, "none", "" + api_key = os.getenv("OPENAI_API_KEY", "").strip() + if not api_key: + return False, "none", "" + + try: + now = asyncio.get_running_loop().time() + except Exception: + now = 0.0 + cached = _TEXT_RISK_CACHE.get(raw) + if cached and (now - cached[3]) < _TEXT_RISK_CACHE_TTL: + return cached[0], cached[1], cached[2] + + base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1").strip() + model = os.getenv("TEXT_RISK_MODEL", "").strip() or os.getenv("OPENAI_MODEL", "gpt-4o-mini") + timeout_s = float(os.getenv("AI_TEXT_RISK_TIMEOUT_SECONDS", "4")) + + prompt = ( + "你是电商客服文本风控分类器。请判断用户文本是否属于禁止接单内容。\n" + "禁止接单类别:\n" + "1) map: 地图相关(地图、行政区划图、地形图、卫星地图、导航图等)\n" + "2) political: 政治/党政/领导人/政治事件\n" + "3) sexual: 色情/擦边/裸露/性暗示\n" + "4) violent: 暴力/血腥\n" + "若不属于以上类别,category=none 且 block=false。\n" + "只输出 JSON,不要解释。\n" + "格式:{\"block\":true/false,\"category\":\"map|political|sexual|violent|other|none\",\"reason\":\"简短原因\"}\n" + f"用户文本:{raw}" + ) + + try: + from openai import AsyncOpenAI + client = AsyncOpenAI(api_key=api_key, base_url=base_url, timeout=timeout_s) + resp = await client.chat.completions.create( + model=model, + temperature=0, + messages=[ + {"role": "system", "content": "你是严格 JSON 输出的风控分类器。"}, + {"role": "user", "content": prompt}, + ], + ) + content = (resp.choices[0].message.content or "").strip() + data = _extract_json(content) + block = bool(data.get("block", False)) + category = str(data.get("category", "none") or "none").strip().lower() + reason = str(data.get("reason", "") or "").strip() + if category not in {"map", "political", "sexual", "violent", "other", "none"}: + category = "none" + block = False + if category == "none": + block = False + _TEXT_RISK_CACHE[raw] = (block, category, reason, now) + return block, category, reason + except Exception: + return False, "none", "" + + +async def should_block_customer_smart(text: str) -> tuple[bool, str, str]: + """ + 客户文本风控总入口:AI 优先,关键词兜底。 + Returns: (block, category, reason) + """ + ai_enabled = os.getenv("AI_TEXT_RISK_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"} + if ai_enabled: + block, cat, reason = await _detect_customer_risk_with_ai(text) + if block: + return True, cat, reason or "AI 判定敏感" + return _risk_from_keyword(text) + + _SENSITIVE_IMAGE_PROMPT = """请判断这张图片是否包含以下任何敏感内容: - 暴力(打斗、武器、伤害) - 血腥(伤口、血迹、残肢等)