fix: harden outbound leak guard and title naming
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import re
|
||||||
from typing import Optional, List, Any
|
from typing import Optional, List, Any
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from db.customer_db import db as customer_db
|
from db.customer_db import db as customer_db
|
||||||
@@ -8,6 +9,43 @@ from db.chat_log_db import log_message, get_conversation
|
|||||||
|
|
||||||
logger = logging.getLogger("cs_agent")
|
logger = logging.getLogger("cs_agent")
|
||||||
|
|
||||||
|
_OUTBOUND_BLOCK_MARKERS = (
|
||||||
|
"【历史记录摘要】",
|
||||||
|
"【详细记录】",
|
||||||
|
"【订单摘要】",
|
||||||
|
"【订单详情】",
|
||||||
|
"<think",
|
||||||
|
"think_never_used",
|
||||||
|
'[{"name":',
|
||||||
|
)
|
||||||
|
|
||||||
|
_HISTORY_LEAK_PATTERNS = [
|
||||||
|
r'\[\d{4}-\d{2}-\d{2}[^\]]*\]\s*(客户|客服)[::]',
|
||||||
|
r'\[\d{2}:\d{2}:\d{2}\]\s*(客户|客服|我)[::]',
|
||||||
|
r'(根据|查看|查询|翻看)(历史|聊天|对话)(记录|内容)',
|
||||||
|
r'历史(记录|对话|消息)(显示|表明|中)',
|
||||||
|
r'之前的(聊天|对话|记录)(中|里|显示)',
|
||||||
|
r'共\d+条(历史|对话)?消息',
|
||||||
|
r'订单号[::]\s*\d{10,}',
|
||||||
|
r'(状态|金额|数量)[::].*(状态|金额|数量)[::]',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_outbound_archive_text(content: str) -> str:
|
||||||
|
if not content:
|
||||||
|
return ""
|
||||||
|
cleaned = str(content).strip()
|
||||||
|
if "[转移会话]" in cleaned:
|
||||||
|
return cleaned
|
||||||
|
if any(marker in cleaned for marker in _OUTBOUND_BLOCK_MARKERS):
|
||||||
|
logger.warning("[Repository] 拦截到内部内容写入外发记录,替换为安全兜底回复")
|
||||||
|
return "我在帮你看记录,稍等哈"
|
||||||
|
for pattern in _HISTORY_LEAK_PATTERNS:
|
||||||
|
if re.search(pattern, cleaned):
|
||||||
|
logger.warning(f"[Repository] 检测到历史记录泄露模式,拦截出站入库: {pattern[:30]}...")
|
||||||
|
return "我在帮你看记录,稍等哈"
|
||||||
|
return cleaned
|
||||||
|
|
||||||
class DataRepository:
|
class DataRepository:
|
||||||
"""
|
"""
|
||||||
异步数据仓库:使用 asyncio.to_thread 屏蔽底层同步 IO 阻塞。
|
异步数据仓库:使用 asyncio.to_thread 屏蔽底层同步 IO 阻塞。
|
||||||
@@ -29,6 +67,8 @@ class DataRepository:
|
|||||||
msg_type: int = 0,
|
msg_type: int = 0,
|
||||||
):
|
):
|
||||||
"""异步持久化存储聊天记录"""
|
"""异步持久化存储聊天记录"""
|
||||||
|
if direction == "out" and int(msg_type or 0) == 0:
|
||||||
|
content = _sanitize_outbound_archive_text(content)
|
||||||
# 将图片URL列表转为\n分隔的字符串
|
# 将图片URL列表转为\n分隔的字符串
|
||||||
urls_str = "\n".join(image_urls) if image_urls else ""
|
urls_str = "\n".join(image_urls) if image_urls else ""
|
||||||
return await asyncio.to_thread(
|
return await asyncio.to_thread(
|
||||||
|
|||||||
@@ -1,13 +1,54 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
import websockets
|
import websockets
|
||||||
|
|
||||||
|
logger = logging.getLogger("cs_agent")
|
||||||
|
|
||||||
|
_OUTBOUND_BLOCK_MARKERS = (
|
||||||
|
"【历史记录摘要】",
|
||||||
|
"【详细记录】",
|
||||||
|
"【订单摘要】",
|
||||||
|
"【订单详情】",
|
||||||
|
"<think",
|
||||||
|
"think_never_used",
|
||||||
|
'[{"name":',
|
||||||
|
)
|
||||||
|
|
||||||
|
_HISTORY_LEAK_PATTERNS = [
|
||||||
|
r'\[\d{4}-\d{2}-\d{2}[^\]]*\]\s*(客户|客服)[::]',
|
||||||
|
r'\[\d{2}:\d{2}:\d{2}\]\s*(客户|客服|我)[::]',
|
||||||
|
r'(根据|查看|查询|翻看)(历史|聊天|对话)(记录|内容)',
|
||||||
|
r'历史(记录|对话|消息)(显示|表明|中)',
|
||||||
|
r'之前的(聊天|对话|记录)(中|里|显示)',
|
||||||
|
r'共\d+条(历史|对话)?消息',
|
||||||
|
r'订单号[::]\s*\d{10,}',
|
||||||
|
r'(状态|金额|数量)[::].*(状态|金额|数量)[::]',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_outbound_text(content: str) -> str:
|
||||||
|
if not content:
|
||||||
|
return ""
|
||||||
|
cleaned = str(content).strip()
|
||||||
|
if "[转移会话]" in cleaned:
|
||||||
|
return cleaned
|
||||||
|
if any(marker in cleaned for marker in _OUTBOUND_BLOCK_MARKERS):
|
||||||
|
logger.warning("[WebSocketSend] 拦截到内部内容外发,替换为安全兜底回复")
|
||||||
|
return "我在帮你看记录,稍等哈"
|
||||||
|
for pattern in _HISTORY_LEAK_PATTERNS:
|
||||||
|
if re.search(pattern, cleaned):
|
||||||
|
logger.warning(f"[WebSocketSend] 检测到历史记录泄露模式: {pattern[:30]}...")
|
||||||
|
return "我在帮你看记录,稍等哈"
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
async def send_text_flow(client, cy_id, acc_type, content):
|
async def send_text_flow(client, cy_id, acc_type, content):
|
||||||
"""主动发送文本消息。"""
|
"""主动发送文本消息。"""
|
||||||
message = {
|
message = {
|
||||||
"msg_id": "",
|
"msg_id": "",
|
||||||
"acc_id": "",
|
"acc_id": "",
|
||||||
"msg": content,
|
"msg": _sanitize_outbound_text(content),
|
||||||
"from_id": client.reply_id,
|
"from_id": client.reply_id,
|
||||||
"from_name": client.reply_id,
|
"from_name": client.reply_id,
|
||||||
"cy_id": cy_id,
|
"cy_id": cy_id,
|
||||||
@@ -38,10 +79,12 @@ async def send_message_flow(client, message):
|
|||||||
"""发送消息到服务器。"""
|
"""发送消息到服务器。"""
|
||||||
if client.websocket and client.websocket.state == websockets.protocol.State.OPEN:
|
if client.websocket and client.websocket.state == websockets.protocol.State.OPEN:
|
||||||
try:
|
try:
|
||||||
payload = message if isinstance(message, dict) else {}
|
payload = dict(message) if isinstance(message, dict) else {}
|
||||||
msg_json = json.dumps(message, ensure_ascii=False)
|
if int(payload.get("msg_type", 0) or 0) == 0:
|
||||||
|
payload["msg"] = _sanitize_outbound_text(payload.get("msg", ""))
|
||||||
|
msg_json = json.dumps(payload, ensure_ascii=False)
|
||||||
await client.websocket.send(msg_json)
|
await client.websocket.send(msg_json)
|
||||||
pretty = json.dumps(message, ensure_ascii=False, indent=2)
|
pretty = json.dumps(payload, ensure_ascii=False, indent=2)
|
||||||
client.logger.info(f"[{client.get_time()}] 发送成功:\n{pretty}")
|
client.logger.info(f"[{client.get_time()}] 发送成功:\n{pretty}")
|
||||||
client._activity_log(
|
client._activity_log(
|
||||||
"send_message_success",
|
"send_message_success",
|
||||||
|
|||||||
@@ -77,6 +77,22 @@ def _build_processing_prompt(intent: str, requirement_text: str, analysis: Dict)
|
|||||||
return f"根据客户需求“{req or '找原图'}”,严格参考原图元素与构图,生成完整干净的高质量素材图。"
|
return f"根据客户需求“{req or '找原图'}”,严格参考原图元素与构图,生成完整干净的高质量素材图。"
|
||||||
|
|
||||||
|
|
||||||
|
def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str:
|
||||||
|
analysis = analysis or {}
|
||||||
|
subject = _safe_name(str(analysis.get("subject") or ""), "")
|
||||||
|
proc_type = _safe_name(str(analysis.get("proc_type") or ""), "")
|
||||||
|
requirement = _safe_name(str(requirement_text or ""), "")
|
||||||
|
action = "修复" if intent == "repair" else "原图"
|
||||||
|
|
||||||
|
parts = [part for part in (subject, proc_type, requirement) if part]
|
||||||
|
if parts:
|
||||||
|
base = "_".join(parts[:2])
|
||||||
|
else:
|
||||||
|
base = "图片识别结果"
|
||||||
|
|
||||||
|
return f"{base}_{action}_{idx}"
|
||||||
|
|
||||||
|
|
||||||
class AutoImagePipelineService:
|
class AutoImagePipelineService:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.customer_db = CustomerDatabase()
|
self.customer_db = CustomerDatabase()
|
||||||
@@ -244,7 +260,7 @@ class AutoImagePipelineService:
|
|||||||
digest = hashlib.md5(f"{customer_id}|{acc_id}|{image_url}".encode("utf-8")).hexdigest()[:10]
|
digest = hashlib.md5(f"{customer_id}|{acc_id}|{image_url}".encode("utf-8")).hexdigest()[:10]
|
||||||
input_path = pipeline_root / f"{digest}_src{_suffix_from_url(image_url)}"
|
input_path = pipeline_root / f"{digest}_src{_suffix_from_url(image_url)}"
|
||||||
output_path = pipeline_root / f"{digest}_out.png"
|
output_path = pipeline_root / f"{digest}_out.png"
|
||||||
title = f"{_safe_name(customer_id, '客户')}_{'修复' if intent == 'repair' else '原图'}_{idx}"
|
title = _build_upload_title(intent, analysis, requirement_text, idx)
|
||||||
prompt = _build_processing_prompt(intent, requirement_text, analysis)
|
prompt = _build_processing_prompt(intent, requirement_text, analysis)
|
||||||
task_id = task_db.add_task(
|
task_id = task_db.add_task(
|
||||||
customer_id=customer_id,
|
customer_id=customer_id,
|
||||||
|
|||||||
Reference in New Issue
Block a user