Files
tw/services/service_image_analyzer.py
2026-03-06 12:44:57 +08:00

320 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
图片分析服务 - 后台静默分析图片,用于数据标定
使用智谱 GLM-4V 视觉模型分析客户发来的图片
"""
import os
import asyncio
import base64
import time
import json
import logging
from typing import Optional, Dict, Any
from openai import AsyncOpenAI
from dotenv import load_dotenv
import aiohttp
from PIL import Image
from io import BytesIO
load_dotenv()
logger = logging.getLogger("cs_agent")
ANALYSIS_PROMPT = """你是一个电商图片处理评估专家。
请仔细分析这张图片,输出以下字段,每行一个,不要多余内容:
敏感内容: <yes|no>
平整度: <flat|mild|rough>
含文字: <yes|no|partial>
文字量: <none|少量|大量>
含人脸: <yes|no>
人脸清晰度: <无|清晰|模糊|遮挡>
阴影: <yes|no>
复杂度: <simple|normal|complex|hard>
原因: <15字以内说明复杂度判断依据>
主体: <图片核心内容,如:印花图案/logo/人物/产品/老照片/风景/文字/其他>
类型: <处理类型,如:印花提取/高清修复/去背景/老照片修复/logo提取/人像修复/其他>
质量: <原图质量,如:清晰/轻微模糊/严重模糊/低分辨率/截图/扫描件>
分辨率评估: <足够|偏低|严重不足>
色彩: <单色|双色|多色|渐变|全彩>
主色调: <白/黑/红/蓝/绿/黄/棕/灰/多彩等>
细节层级: <简约|中等|精细|极精细>
边缘清晰度: <清晰|模糊|毛糙>
背景: <纯色|简单|复杂|透明>
可做: <yes|partial|no>
风险: <none|low|high>
透视: <no|mild|strong>
比例: <从以下选一个最合适的1:1 / 9:16 / 16:9 / 3:4 / 4:3 / 3:2 / 2:3 / 5:4 / 4:5>
预估工时: <5分钟/15分钟/30分钟/1小时/2小时以上>
难点: <具体说明处理难点,如:细节多/透视矫正/文字提取等15字内>
建议方案: <AI处理/人工精修/AI+人工/建议重拍>
提示词: <为 Gemini 写处理指令中文80字以内要详细具体>
备注: <给客服AI的特别提示没有则填无>
判断规则:
- 平整度 flat画面平整、无褶皱、无透视 → 便宜
- 平整度 mild轻微褶皱/透视 → 中等
- 平整度 rough有褶皱/透视/曲面 → 贵
- 含文字 yes有小字要精细保留
- 含人脸 yes有人脸需处理 → 加价
- 敏感内容=yes 时,可做必须填 no
- 细节层级影响工时和价格
"""
class ImageAnalyzerService:
"""图片分析服务 - 后台静默运行,不影响主流程"""
_CACHE_TTL_SECONDS = 300
_analysis_cache: Dict[str, tuple] = {}
PRICE_MAP = {
"simple": (10, 15),
"normal": (15, 20),
"complex": (20, 25),
"hard": (25, 30),
}
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.base_url = os.getenv("OPENAI_BASE_URL", "https://open.bigmodel.cn/api/paas/v4")
self.vision_model = os.getenv("VISION_MODEL", "glm-4v-flash")
def _is_url(self, path: str) -> bool:
return path.startswith("http://") or path.startswith("https://")
async def _get_image_size(self, image_path: str) -> tuple:
"""获取图片尺寸"""
try:
if self._is_url(image_path):
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(image_path) as resp:
if resp.status != 200:
return (0, 0)
data = await resp.read()
with Image.open(BytesIO(data)) as img:
return img.size
else:
with Image.open(image_path) as img:
return img.size
except Exception as e:
logger.debug(f"[ImageAnalyzer] 获取尺寸失败: {e}")
return (0, 0)
async def analyze(self, image_url: str) -> Dict[str, Any]:
"""
异步分析图片,返回结构化结果
Returns:
{
"url": 图片URL,
"complexity": simple|normal|complex|hard,
"subject": 主体描述,
"proc_type": 处理类型,
"quality": 质量评估,
"flatness": flat|mild|rough,
"has_text": yes|no,
"has_face": yes|no,
"has_shadow": yes|no,
"risk": none|low|high,
"feasibility": yes|partial|no,
"perspective": no|mild|strong,
"aspect_ratio": 比例,
"gemini_prompt": 处理提示词,
"note": 备注,
"price_suggest": 建议价格,
"width": 宽度,
"height": 高度,
"analyzed_at": 分析时间,
"success": True/False
}
"""
if not self.api_key:
return self._fallback(image_url, "未配置 API Key")
# 缓存检查
cache_key = image_url
now = time.monotonic()
cached = self._analysis_cache.get(cache_key)
if cached:
result, cached_at = cached
if now - cached_at < self._CACHE_TTL_SECONDS:
logger.debug(f"[ImageAnalyzer] 缓存命中: {image_url[:50]}...")
return dict(result)
else:
del self._analysis_cache[cache_key]
start = time.monotonic()
try:
client = AsyncOpenAI(base_url=self.base_url, api_key=self.api_key)
response = await asyncio.wait_for(
client.chat.completions.create(
model=self.vision_model,
messages=[{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_url}},
{"type": "text", "text": ANALYSIS_PROMPT}
]
}],
max_tokens=500
),
timeout=30
)
content = response.choices[0].message.content
elapsed = time.monotonic() - start
result = self._parse_result(image_url, content)
result["elapsed"] = round(elapsed, 2)
# 获取尺寸
w, h = await self._get_image_size(image_url)
result["width"] = w
result["height"] = h
# 缓存
self._analysis_cache[cache_key] = (result, now)
# 详细日志
log_parts = [
f"主体={result.get('subject', '?')}",
f"类型={result.get('proc_type', '?')}",
f"复杂度={result.get('complexity', '?')}",
f"色彩={result.get('color', '?')}",
f"细节={result.get('detail_level', '?')}",
f"工时={result.get('est_time', '?')}",
f"方案={result.get('suggest_method', '?')}",
f"¥{result.get('price_suggest', 0)}",
]
logger.info(f"[ImageAnalyzer] 分析完成: {' | '.join(log_parts)}")
if result.get('difficulty'):
logger.info(f"[ImageAnalyzer] 难点: {result.get('difficulty')}")
if result.get('gemini_prompt'):
logger.info(f"[ImageAnalyzer] Gemini提示词: {result.get('gemini_prompt')}")
return result
except asyncio.TimeoutError:
logger.warning(f"[ImageAnalyzer] 分析超时: {image_url[:50]}...")
return self._fallback(image_url, "分析超时")
except Exception as e:
logger.warning(f"[ImageAnalyzer] 分析失败: {e}")
return self._fallback(image_url, str(e))
def _parse_result(self, url: str, content: str) -> Dict[str, Any]:
"""解析视觉模型返回的文本"""
import re
from datetime import datetime
def extract(key: str, default: str = "") -> str:
pattern = rf"{key}:\s*(.+)"
match = re.search(pattern, content, re.IGNORECASE)
return match.group(1).strip() if match else default
complexity = extract("复杂度", "normal").lower()
if complexity not in ("simple", "normal", "complex", "hard"):
complexity = "normal"
price_min, price_max = self.PRICE_MAP.get(complexity, (15, 20))
price_suggest = round((price_min + price_max) / 2 / 5) * 5
# 文字加价
has_text = extract("含文字", "no").lower()
if has_text in ("yes", "partial") and complexity in ("simple", "normal"):
price_suggest += 5
# 人脸加价
has_face = extract("含人脸", "no").lower()
if has_face == "yes":
price_suggest += 5
# 精细度加价
detail_level = extract("细节层级", "中等")
if detail_level == "极精细":
price_suggest += 10
elif detail_level == "精细":
price_suggest += 5
return {
"url": url,
"complexity": complexity,
"reason": extract("原因"),
"subject": extract("主体"),
"proc_type": extract("类型"),
"quality": extract("质量"),
"resolution": extract("分辨率评估", "足够"),
"flatness": extract("平整度", "mild").lower(),
"has_text": has_text,
"text_amount": extract("文字量", "none"),
"has_face": has_face,
"face_clarity": extract("人脸清晰度", ""),
"has_shadow": extract("阴影", "no").lower(),
"color": extract("色彩", "全彩"),
"main_color": extract("主色调", ""),
"detail_level": detail_level,
"edge_clarity": extract("边缘清晰度", "清晰"),
"background": extract("背景", "简单"),
"risk": extract("风险", "none").lower(),
"feasibility": extract("可做", "yes").lower(),
"sensitive": extract("敏感内容", "no").lower(),
"perspective": extract("透视", "no").lower(),
"aspect_ratio": extract("比例", "1:1"),
"est_time": extract("预估工时", "15分钟"),
"difficulty": extract("难点", ""),
"suggest_method": extract("建议方案", "AI处理"),
"gemini_prompt": extract("提示词"),
"note": extract("备注"),
"price_min": price_min,
"price_max": price_max,
"price_suggest": price_suggest,
"analyzed_at": datetime.now().isoformat(),
"success": True
}
def _fallback(self, url: str, reason: str) -> Dict[str, Any]:
"""分析失败时的默认结果"""
from datetime import datetime
return {
"url": url,
"complexity": "normal",
"reason": reason,
"subject": "",
"proc_type": "",
"quality": "",
"resolution": "",
"flatness": "",
"has_text": "no",
"text_amount": "none",
"has_face": "no",
"face_clarity": "",
"has_shadow": "no",
"color": "",
"main_color": "",
"detail_level": "中等",
"edge_clarity": "",
"background": "",
"risk": "none",
"feasibility": "yes",
"sensitive": "no",
"perspective": "no",
"aspect_ratio": "1:1",
"est_time": "",
"difficulty": "",
"suggest_method": "",
"gemini_prompt": "",
"note": "",
"price_min": 15,
"price_max": 20,
"price_suggest": 20,
"width": 0,
"height": 0,
"analyzed_at": datetime.now().isoformat(),
"success": False
}
# 全局实例
image_analyzer_service = ImageAnalyzerService()