Files
tw/scripts/test_alicdn_download.py

75 lines
2.4 KiB
Python

import asyncio
from pathlib import Path
import httpx
TEST_URL = "https://img.alicdn.com/imgextra/i1/O1CN01959PmC2MK7jvMhqXF_!!4611686018427385312-0-amp.jpg"
OUTPUT_DIR = Path(__file__).resolve().parents[1] / "tmp_alicdn_download"
CONTENT_TYPE_TO_SUFFIX = {
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
"image/avif": ".avif",
}
DEFAULT_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/133.0.0.0 Safari/537.36"
),
"Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Cache-Control": "no-cache",
"Pragma": "no-cache",
"Referer": "https://www.taobao.com/",
}
async def download_once(client: httpx.AsyncClient, url: str):
response = await client.get(url, headers=DEFAULT_HEADERS)
print(f"HTTP {response.status_code}")
content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower()
print(f"Content-Type: {content_type}")
if response.status_code != 200:
print(response.text[:300])
response.raise_for_status()
suffix = CONTENT_TYPE_TO_SUFFIX.get(content_type, ".bin")
output_path = OUTPUT_DIR / f"alicdn_test{suffix}"
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_bytes(response.content)
print(f"Saved to: {output_path}")
print(f"Size: {output_path.stat().st_size} bytes")
async def main():
timeout = httpx.Timeout(60.0, connect=20.0)
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
last_error = None
referers = [
"https://www.taobao.com/",
"https://item.taobao.com/",
"https://detail.tmall.com/",
]
for idx, referer in enumerate(referers, 1):
try:
DEFAULT_HEADERS["Referer"] = referer
print(f"Attempt {idx} with Referer={referer}")
await download_once(client, TEST_URL)
print("Download success")
return
except Exception as e:
last_error = e
print(f"Attempt {idx} failed: {type(e).__name__}: {e}")
await asyncio.sleep(1)
raise RuntimeError(f"All attempts failed: {last_error}")
if __name__ == "__main__":
asyncio.run(main())