fix: harden outbound leak guard and title naming

This commit is contained in:
2026-03-09 14:34:04 +08:00
parent d3b55798e5
commit a2119f3b6d
3 changed files with 104 additions and 5 deletions

View File

@@ -1,5 +1,6 @@
import logging import logging
import asyncio import asyncio
import re
from typing import Optional, List, Any from typing import Optional, List, Any
from datetime import datetime from datetime import datetime
from db.customer_db import db as customer_db from db.customer_db import db as customer_db
@@ -8,6 +9,43 @@ from db.chat_log_db import log_message, get_conversation
logger = logging.getLogger("cs_agent") logger = logging.getLogger("cs_agent")
_OUTBOUND_BLOCK_MARKERS = (
"【历史记录摘要】",
"【详细记录】",
"【订单摘要】",
"【订单详情】",
"<think",
"think_never_used",
'[{"name":',
)
_HISTORY_LEAK_PATTERNS = [
r'\[\d{4}-\d{2}-\d{2}[^\]]*\]\s*(客户|客服)[:]',
r'\[\d{2}:\d{2}:\d{2}\]\s*(客户|客服|我)[:]',
r'(根据|查看|查询|翻看)(历史|聊天|对话)(记录|内容)',
r'历史(记录|对话|消息)(显示|表明|中)',
r'之前的(聊天|对话|记录)(中|里|显示)',
r'\d+条(历史|对话)?消息',
r'订单号[:]\s*\d{10,}',
r'(状态|金额|数量)[:].*(状态|金额|数量)[:]',
]
def _sanitize_outbound_archive_text(content: str) -> str:
if not content:
return ""
cleaned = str(content).strip()
if "[转移会话]" in cleaned:
return cleaned
if any(marker in cleaned for marker in _OUTBOUND_BLOCK_MARKERS):
logger.warning("[Repository] 拦截到内部内容写入外发记录,替换为安全兜底回复")
return "我在帮你看记录,稍等哈"
for pattern in _HISTORY_LEAK_PATTERNS:
if re.search(pattern, cleaned):
logger.warning(f"[Repository] 检测到历史记录泄露模式,拦截出站入库: {pattern[:30]}...")
return "我在帮你看记录,稍等哈"
return cleaned
class DataRepository: class DataRepository:
""" """
异步数据仓库:使用 asyncio.to_thread 屏蔽底层同步 IO 阻塞。 异步数据仓库:使用 asyncio.to_thread 屏蔽底层同步 IO 阻塞。
@@ -29,6 +67,8 @@ class DataRepository:
msg_type: int = 0, msg_type: int = 0,
): ):
"""异步持久化存储聊天记录""" """异步持久化存储聊天记录"""
if direction == "out" and int(msg_type or 0) == 0:
content = _sanitize_outbound_archive_text(content)
# 将图片URL列表转为\n分隔的字符串 # 将图片URL列表转为\n分隔的字符串
urls_str = "\n".join(image_urls) if image_urls else "" urls_str = "\n".join(image_urls) if image_urls else ""
return await asyncio.to_thread( return await asyncio.to_thread(

View File

@@ -1,13 +1,54 @@
import json import json
import logging
import re
import websockets import websockets
logger = logging.getLogger("cs_agent")
_OUTBOUND_BLOCK_MARKERS = (
"【历史记录摘要】",
"【详细记录】",
"【订单摘要】",
"【订单详情】",
"<think",
"think_never_used",
'[{"name":',
)
_HISTORY_LEAK_PATTERNS = [
r'\[\d{4}-\d{2}-\d{2}[^\]]*\]\s*(客户|客服)[:]',
r'\[\d{2}:\d{2}:\d{2}\]\s*(客户|客服|我)[:]',
r'(根据|查看|查询|翻看)(历史|聊天|对话)(记录|内容)',
r'历史(记录|对话|消息)(显示|表明|中)',
r'之前的(聊天|对话|记录)(中|里|显示)',
r'\d+条(历史|对话)?消息',
r'订单号[:]\s*\d{10,}',
r'(状态|金额|数量)[:].*(状态|金额|数量)[:]',
]
def _sanitize_outbound_text(content: str) -> str:
if not content:
return ""
cleaned = str(content).strip()
if "[转移会话]" in cleaned:
return cleaned
if any(marker in cleaned for marker in _OUTBOUND_BLOCK_MARKERS):
logger.warning("[WebSocketSend] 拦截到内部内容外发,替换为安全兜底回复")
return "我在帮你看记录,稍等哈"
for pattern in _HISTORY_LEAK_PATTERNS:
if re.search(pattern, cleaned):
logger.warning(f"[WebSocketSend] 检测到历史记录泄露模式: {pattern[:30]}...")
return "我在帮你看记录,稍等哈"
return cleaned
async def send_text_flow(client, cy_id, acc_type, content): async def send_text_flow(client, cy_id, acc_type, content):
"""主动发送文本消息。""" """主动发送文本消息。"""
message = { message = {
"msg_id": "", "msg_id": "",
"acc_id": "", "acc_id": "",
"msg": content, "msg": _sanitize_outbound_text(content),
"from_id": client.reply_id, "from_id": client.reply_id,
"from_name": client.reply_id, "from_name": client.reply_id,
"cy_id": cy_id, "cy_id": cy_id,
@@ -38,10 +79,12 @@ async def send_message_flow(client, message):
"""发送消息到服务器。""" """发送消息到服务器。"""
if client.websocket and client.websocket.state == websockets.protocol.State.OPEN: if client.websocket and client.websocket.state == websockets.protocol.State.OPEN:
try: try:
payload = message if isinstance(message, dict) else {} payload = dict(message) if isinstance(message, dict) else {}
msg_json = json.dumps(message, ensure_ascii=False) if int(payload.get("msg_type", 0) or 0) == 0:
payload["msg"] = _sanitize_outbound_text(payload.get("msg", ""))
msg_json = json.dumps(payload, ensure_ascii=False)
await client.websocket.send(msg_json) await client.websocket.send(msg_json)
pretty = json.dumps(message, ensure_ascii=False, indent=2) pretty = json.dumps(payload, ensure_ascii=False, indent=2)
client.logger.info(f"[{client.get_time()}] 发送成功:\n{pretty}") client.logger.info(f"[{client.get_time()}] 发送成功:\n{pretty}")
client._activity_log( client._activity_log(
"send_message_success", "send_message_success",

View File

@@ -77,6 +77,22 @@ def _build_processing_prompt(intent: str, requirement_text: str, analysis: Dict)
return f"根据客户需求“{req or '找原图'}”,严格参考原图元素与构图,生成完整干净的高质量素材图。" return f"根据客户需求“{req or '找原图'}”,严格参考原图元素与构图,生成完整干净的高质量素材图。"
def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str:
analysis = analysis or {}
subject = _safe_name(str(analysis.get("subject") or ""), "")
proc_type = _safe_name(str(analysis.get("proc_type") or ""), "")
requirement = _safe_name(str(requirement_text or ""), "")
action = "修复" if intent == "repair" else "原图"
parts = [part for part in (subject, proc_type, requirement) if part]
if parts:
base = "_".join(parts[:2])
else:
base = "图片识别结果"
return f"{base}_{action}_{idx}"
class AutoImagePipelineService: class AutoImagePipelineService:
def __init__(self): def __init__(self):
self.customer_db = CustomerDatabase() self.customer_db = CustomerDatabase()
@@ -244,7 +260,7 @@ class AutoImagePipelineService:
digest = hashlib.md5(f"{customer_id}|{acc_id}|{image_url}".encode("utf-8")).hexdigest()[:10] digest = hashlib.md5(f"{customer_id}|{acc_id}|{image_url}".encode("utf-8")).hexdigest()[:10]
input_path = pipeline_root / f"{digest}_src{_suffix_from_url(image_url)}" input_path = pipeline_root / f"{digest}_src{_suffix_from_url(image_url)}"
output_path = pipeline_root / f"{digest}_out.png" output_path = pipeline_root / f"{digest}_out.png"
title = f"{_safe_name(customer_id, '客户')}_{'修复' if intent == 'repair' else '原图'}_{idx}" title = _build_upload_title(intent, analysis, requirement_text, idx)
prompt = _build_processing_prompt(intent, requirement_text, analysis) prompt = _build_processing_prompt(intent, requirement_text, analysis)
task_id = task_db.add_task( task_id = task_db.add_task(
customer_id=customer_id, customer_id=customer_id,