fix: ignore malformed image urls from card payloads
This commit is contained in:
@@ -136,10 +136,27 @@ class QianniuAdapter(BaseAdapter):
|
|||||||
logger.error(f"[QianniuAdapter] 发送失败: {e}")
|
logger.error(f"[QianniuAdapter] 发送失败: {e}")
|
||||||
|
|
||||||
def _extract_urls(self, text: str) -> List[str]:
|
def _extract_urls(self, text: str) -> List[str]:
|
||||||
if not text: return []
|
if not text:
|
||||||
|
return []
|
||||||
image_exts = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
|
image_exts = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
|
||||||
candidates = re.findall(r'https?://[^\s#]+', text)
|
candidates = re.findall(r'https?://[^\s#,"\'}\]]+', text)
|
||||||
return [u for u in candidates if any(ext in u.lower() for ext in image_exts)]
|
urls: List[str] = []
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
for candidate in candidates:
|
||||||
|
url = str(candidate or "").strip().rstrip('\'".,;:!?)')
|
||||||
|
lower = url.lower()
|
||||||
|
if not any(ext in lower for ext in image_exts):
|
||||||
|
continue
|
||||||
|
# 过滤被卡片/JSON 串污染的伪图片链接
|
||||||
|
if any(marker in lower for marker in ("%22title%22", "%22topic%22", '"title":', '"topic":', "%7d")):
|
||||||
|
continue
|
||||||
|
if url in seen:
|
||||||
|
continue
|
||||||
|
seen.add(url)
|
||||||
|
urls.append(url)
|
||||||
|
|
||||||
|
return urls
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _safe_int(value: Any, default: int = 0) -> int:
|
def _safe_int(value: Any, default: int = 0) -> int:
|
||||||
|
|||||||
Reference in New Issue
Block a user