import asyncio from pathlib import Path import httpx TEST_URL = "https://img.alicdn.com/imgextra/i1/O1CN01959PmC2MK7jvMhqXF_!!4611686018427385312-0-amp.jpg" OUTPUT_DIR = Path(__file__).resolve().parents[1] / "tmp_alicdn_download" CONTENT_TYPE_TO_SUFFIX = { "image/jpeg": ".jpg", "image/jpg": ".jpg", "image/png": ".png", "image/webp": ".webp", "image/avif": ".avif", } DEFAULT_HEADERS = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/133.0.0.0 Safari/537.36" ), "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Cache-Control": "no-cache", "Pragma": "no-cache", "Referer": "https://www.taobao.com/", } async def download_once(client: httpx.AsyncClient, url: str): response = await client.get(url, headers=DEFAULT_HEADERS) print(f"HTTP {response.status_code}") content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower() print(f"Content-Type: {content_type}") if response.status_code != 200: print(response.text[:300]) response.raise_for_status() suffix = CONTENT_TYPE_TO_SUFFIX.get(content_type, ".bin") output_path = OUTPUT_DIR / f"alicdn_test{suffix}" output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_bytes(response.content) print(f"Saved to: {output_path}") print(f"Size: {output_path.stat().st_size} bytes") async def main(): timeout = httpx.Timeout(60.0, connect=20.0) async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client: last_error = None referers = [ "https://www.taobao.com/", "https://item.taobao.com/", "https://detail.tmall.com/", ] for idx, referer in enumerate(referers, 1): try: DEFAULT_HEADERS["Referer"] = referer print(f"Attempt {idx} with Referer={referer}") await download_once(client, TEST_URL) print("Download success") return except Exception as e: last_error = e print(f"Attempt {idx} failed: {type(e).__name__}: {e}") await asyncio.sleep(1) raise RuntimeError(f"All attempts failed: {last_error}") if __name__ == "__main__": asyncio.run(main())