fix: ignore malformed image urls from card payloads
This commit is contained in:
@@ -136,10 +136,27 @@ class QianniuAdapter(BaseAdapter):
|
||||
logger.error(f"[QianniuAdapter] 发送失败: {e}")
|
||||
|
||||
def _extract_urls(self, text: str) -> List[str]:
|
||||
if not text: return []
|
||||
if not text:
|
||||
return []
|
||||
image_exts = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
|
||||
candidates = re.findall(r'https?://[^\s#]+', text)
|
||||
return [u for u in candidates if any(ext in u.lower() for ext in image_exts)]
|
||||
candidates = re.findall(r'https?://[^\s#,"\'}\]]+', text)
|
||||
urls: List[str] = []
|
||||
seen = set()
|
||||
|
||||
for candidate in candidates:
|
||||
url = str(candidate or "").strip().rstrip('\'".,;:!?)')
|
||||
lower = url.lower()
|
||||
if not any(ext in lower for ext in image_exts):
|
||||
continue
|
||||
# 过滤被卡片/JSON 串污染的伪图片链接
|
||||
if any(marker in lower for marker in ("%22title%22", "%22topic%22", '"title":', '"topic":', "%7d")):
|
||||
continue
|
||||
if url in seen:
|
||||
continue
|
||||
seen.add(url)
|
||||
urls.append(url)
|
||||
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _safe_int(value: Any, default: int = 0) -> int:
|
||||
|
||||
Reference in New Issue
Block a user