import re from dataclasses import dataclass IMAGE_URL_RE = re.compile(r"https?://[^\s]+(?:\.jpg|\.jpeg|\.png|\.webp|\.bmp|\.gif)(?:\?[^\s]*)?", re.I) SIZE_RE = re.compile(r"(\d+(?:\.\d+)?)\s*(米|m|M)\s*[xX*乘]\s*(\d+(?:\.\d+)?)\s*(米|m|M)") MAP_POLITICAL_KWS = ["地图", "国界", "边界", "南海", "台湾", "香港", "澳门", "西藏", "新疆", "政治"] PORN_RISK_KWS = ["裸", "成人视频", "成人视频", "性爱", "激情", "成人视频"] EXTERNAL_CONTACT_KWS = ["微信", "vx", "vx", "qq", "手机号", "电话", "加我", "私下"] PRICE_KWS = ["多少钱", "怎么收费", "报价", "价格", "多少米", "多少"] GREETING_KWS = ["你好", "您好", "在吗", "在不在", "hello", "hi"] FINISH_KWS = ["发完了", "没了", "就这些", "报价吧", "可以报价", "先这样"] NONSENSE_KWS = ["嗯", "哦", "好的", "ok", "1", "收到"] @dataclass class RuleResult: ignore: bool = False normalized_msg: str = "" reason: str = "" def extract_customer_text_from_shop_card(msg: str) -> str: if "[进店卡片]" not in (msg or ""): return "" prefix = msg.split("#*#[进店卡片]", 1)[0].strip() if prefix and prefix not in {"你好", "您好", "在吗"}: return prefix return prefix def detect_order_status(order_text: str) -> str: t = (order_text or "") if "买家已付款" in t: return "paid" if "等待买家付款" in t or "待付款" in t: return "pending_payment" if "已退款" in t or "退款" in t: return "refund" return "unknown" def extract_size_pairs_m(msg: str) -> list[tuple[float, float]]: out: list[tuple[float, float]] = [] for m in SIZE_RE.finditer(msg or ""): w = float(m.group(1)) h = float(m.group(3)) out.append((w, h)) return out def has_map_or_political_risk(msg: str, goods_name: str = "") -> bool: t = f"{msg or ''} {goods_name or ''}".lower() return any(k.lower() in t for k in MAP_POLITICAL_KWS) def has_porn_risk(msg: str) -> bool: t = (msg or "").lower() return any(k.lower() in t for k in PORN_RISK_KWS) def requests_external_contact(msg: str) -> bool: t = (msg or "").lower() return any(k.lower() in t for k in EXTERNAL_CONTACT_KWS) def is_meaningless_short(msg: str) -> bool: t = (msg or "").strip().lower() if len(t) <= 2: return True return t in NONSENSE_KWS def prefilter_message(msg: str, msg_type: int) -> RuleResult: m = (msg or "").strip() if not m: return RuleResult(ignore=True, reason="empty") if msg_type not in (0, 1): return RuleResult(ignore=True, reason="unsupported_msg_type") if "由 " in m and " 转交给 " in m: return RuleResult(ignore=True, reason="transfer_notice") if "Gemini 店铺消息,跳过" in m: return RuleResult(ignore=True, reason="system_echo") if "[进店卡片]" in m: t = extract_customer_text_from_shop_card(m) if t: return RuleResult(ignore=False, normalized_msg=t, reason="shop_card_with_text") return RuleResult(ignore=True, reason="pure_shop_card") return RuleResult(ignore=False, normalized_msg=m, reason="normal") def detect_intent(msg: str) -> str: m = (msg or "").lower() if IMAGE_URL_RE.search(m): return "image" if any(k in m for k in FINISH_KWS): return "finish_or_quote_trigger" if any(k in m for k in PRICE_KWS): return "pricing" if any(k in m for k in GREETING_KWS): return "greeting" if requests_external_contact(m): return "external_contact" if is_meaningless_short(m): return "nonsense" return "unknown" def extract_image_urls(msg: str) -> list[str]: return IMAGE_URL_RE.findall(msg or "") def rules_prompt() -> str: return ( "你是淘宝客服主决策。只输出JSON动作,不要解释。\n" "动作 action 只能是: reply / quote / transfer / noop。\n" "规则提炼(严格执行):\n" "1) 客户发图: 先承接, 允许继续收图。\n" "2) 询价且有图(当前/待处理): 优先 quote。\n" "3) 无图询价: reply 承接并引导发图。\n" "4) 客户说发完了/报价吧/没图: 若有待处理图则 quote。\n" "5) 外部联系方式请求: reply 站内引导, 不给微信QQ手机号。\n" "6) 地图/政治/黄暴风险: transfer 或拒绝性 reply。\n" "7) 仅无意义短句(嗯/哦/ok): 给简短自然承接, 不要长回复。\n" "8) 避免重复同一句; 若上句语义相同则换表达。\n" "9) 订单已付款: 可回复已安排; 待付款: 提示先付款。\n" "10) 尺寸明显超大(如>=2m*2m): 提示需补图/重做边缘, 不要直接承诺一模一样。\n" "11) 店铺差异化: 按 acc_id/persona 口吻回复, 保持真人聊天。\n" "12) 最终输出只允许一个动作, 不能混合。\n" "13) reply 必须很短: 1句为主, 不超过20字, 口语化, 不要客服官话和AI腔。\n" "输出格式:\n" '{"action":"reply|quote|transfer|noop","reply":"","transfer_msg":"","quote_mode":"flush_pending|analyze_current_or_recent|collect_only","reason":""}' )