diff --git a/core/adapters/qianniu_adapter.py b/core/adapters/qianniu_adapter.py index 452f167..620dbe8 100644 --- a/core/adapters/qianniu_adapter.py +++ b/core/adapters/qianniu_adapter.py @@ -136,10 +136,27 @@ class QianniuAdapter(BaseAdapter): logger.error(f"[QianniuAdapter] 发送失败: {e}") def _extract_urls(self, text: str) -> List[str]: - if not text: return [] + if not text: + return [] image_exts = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp") - candidates = re.findall(r'https?://[^\s#]+', text) - return [u for u in candidates if any(ext in u.lower() for ext in image_exts)] + candidates = re.findall(r'https?://[^\s#,"\'}\]]+', text) + urls: List[str] = [] + seen = set() + + for candidate in candidates: + url = str(candidate or "").strip().rstrip('\'".,;:!?)') + lower = url.lower() + if not any(ext in lower for ext in image_exts): + continue + # 过滤被卡片/JSON 串污染的伪图片链接 + if any(marker in lower for marker in ("%22title%22", "%22topic%22", '"title":', '"topic":', "%7d")): + continue + if url in seen: + continue + seen.add(url) + urls.append(url) + + return urls @staticmethod def _safe_int(value: Any, default: int = 0) -> int: