This commit is contained in:
2026-03-06 12:44:57 +08:00
parent fa61b11b02
commit 006b035de4
132 changed files with 1361 additions and 17400 deletions

View File

@@ -0,0 +1,319 @@
"""
图片分析服务 - 后台静默分析图片,用于数据标定
使用智谱 GLM-4V 视觉模型分析客户发来的图片
"""
import os
import asyncio
import base64
import time
import json
import logging
from typing import Optional, Dict, Any
from openai import AsyncOpenAI
from dotenv import load_dotenv
import aiohttp
from PIL import Image
from io import BytesIO
load_dotenv()
logger = logging.getLogger("cs_agent")
ANALYSIS_PROMPT = """你是一个电商图片处理评估专家。
请仔细分析这张图片,输出以下字段,每行一个,不要多余内容:
敏感内容: <yes|no>
平整度: <flat|mild|rough>
含文字: <yes|no|partial>
文字量: <none|少量|大量>
含人脸: <yes|no>
人脸清晰度: <无|清晰|模糊|遮挡>
阴影: <yes|no>
复杂度: <simple|normal|complex|hard>
原因: <15字以内说明复杂度判断依据>
主体: <图片核心内容,如:印花图案/logo/人物/产品/老照片/风景/文字/其他>
类型: <处理类型,如:印花提取/高清修复/去背景/老照片修复/logo提取/人像修复/其他>
质量: <原图质量,如:清晰/轻微模糊/严重模糊/低分辨率/截图/扫描件>
分辨率评估: <足够|偏低|严重不足>
色彩: <单色|双色|多色|渐变|全彩>
主色调: <白/黑/红/蓝/绿/黄/棕/灰/多彩等>
细节层级: <简约|中等|精细|极精细>
边缘清晰度: <清晰|模糊|毛糙>
背景: <纯色|简单|复杂|透明>
可做: <yes|partial|no>
风险: <none|low|high>
透视: <no|mild|strong>
比例: <从以下选一个最合适的1:1 / 9:16 / 16:9 / 3:4 / 4:3 / 3:2 / 2:3 / 5:4 / 4:5>
预估工时: <5分钟/15分钟/30分钟/1小时/2小时以上>
难点: <具体说明处理难点,如:细节多/透视矫正/文字提取等15字内>
建议方案: <AI处理/人工精修/AI+人工/建议重拍>
提示词: <为 Gemini 写处理指令中文80字以内要详细具体>
备注: <给客服AI的特别提示没有则填无>
判断规则:
- 平整度 flat画面平整、无褶皱、无透视 → 便宜
- 平整度 mild轻微褶皱/透视 → 中等
- 平整度 rough有褶皱/透视/曲面 → 贵
- 含文字 yes有小字要精细保留
- 含人脸 yes有人脸需处理 → 加价
- 敏感内容=yes 时,可做必须填 no
- 细节层级影响工时和价格
"""
class ImageAnalyzerService:
"""图片分析服务 - 后台静默运行,不影响主流程"""
_CACHE_TTL_SECONDS = 300
_analysis_cache: Dict[str, tuple] = {}
PRICE_MAP = {
"simple": (10, 15),
"normal": (15, 20),
"complex": (20, 25),
"hard": (25, 30),
}
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.base_url = os.getenv("OPENAI_BASE_URL", "https://open.bigmodel.cn/api/paas/v4")
self.vision_model = os.getenv("VISION_MODEL", "glm-4v-flash")
def _is_url(self, path: str) -> bool:
return path.startswith("http://") or path.startswith("https://")
async def _get_image_size(self, image_path: str) -> tuple:
"""获取图片尺寸"""
try:
if self._is_url(image_path):
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(image_path) as resp:
if resp.status != 200:
return (0, 0)
data = await resp.read()
with Image.open(BytesIO(data)) as img:
return img.size
else:
with Image.open(image_path) as img:
return img.size
except Exception as e:
logger.debug(f"[ImageAnalyzer] 获取尺寸失败: {e}")
return (0, 0)
async def analyze(self, image_url: str) -> Dict[str, Any]:
"""
异步分析图片,返回结构化结果
Returns:
{
"url": 图片URL,
"complexity": simple|normal|complex|hard,
"subject": 主体描述,
"proc_type": 处理类型,
"quality": 质量评估,
"flatness": flat|mild|rough,
"has_text": yes|no,
"has_face": yes|no,
"has_shadow": yes|no,
"risk": none|low|high,
"feasibility": yes|partial|no,
"perspective": no|mild|strong,
"aspect_ratio": 比例,
"gemini_prompt": 处理提示词,
"note": 备注,
"price_suggest": 建议价格,
"width": 宽度,
"height": 高度,
"analyzed_at": 分析时间,
"success": True/False
}
"""
if not self.api_key:
return self._fallback(image_url, "未配置 API Key")
# 缓存检查
cache_key = image_url
now = time.monotonic()
cached = self._analysis_cache.get(cache_key)
if cached:
result, cached_at = cached
if now - cached_at < self._CACHE_TTL_SECONDS:
logger.debug(f"[ImageAnalyzer] 缓存命中: {image_url[:50]}...")
return dict(result)
else:
del self._analysis_cache[cache_key]
start = time.monotonic()
try:
client = AsyncOpenAI(base_url=self.base_url, api_key=self.api_key)
response = await asyncio.wait_for(
client.chat.completions.create(
model=self.vision_model,
messages=[{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_url}},
{"type": "text", "text": ANALYSIS_PROMPT}
]
}],
max_tokens=500
),
timeout=30
)
content = response.choices[0].message.content
elapsed = time.monotonic() - start
result = self._parse_result(image_url, content)
result["elapsed"] = round(elapsed, 2)
# 获取尺寸
w, h = await self._get_image_size(image_url)
result["width"] = w
result["height"] = h
# 缓存
self._analysis_cache[cache_key] = (result, now)
# 详细日志
log_parts = [
f"主体={result.get('subject', '?')}",
f"类型={result.get('proc_type', '?')}",
f"复杂度={result.get('complexity', '?')}",
f"色彩={result.get('color', '?')}",
f"细节={result.get('detail_level', '?')}",
f"工时={result.get('est_time', '?')}",
f"方案={result.get('suggest_method', '?')}",
f"¥{result.get('price_suggest', 0)}",
]
logger.info(f"[ImageAnalyzer] 分析完成: {' | '.join(log_parts)}")
if result.get('difficulty'):
logger.info(f"[ImageAnalyzer] 难点: {result.get('difficulty')}")
if result.get('gemini_prompt'):
logger.info(f"[ImageAnalyzer] Gemini提示词: {result.get('gemini_prompt')}")
return result
except asyncio.TimeoutError:
logger.warning(f"[ImageAnalyzer] 分析超时: {image_url[:50]}...")
return self._fallback(image_url, "分析超时")
except Exception as e:
logger.warning(f"[ImageAnalyzer] 分析失败: {e}")
return self._fallback(image_url, str(e))
def _parse_result(self, url: str, content: str) -> Dict[str, Any]:
"""解析视觉模型返回的文本"""
import re
from datetime import datetime
def extract(key: str, default: str = "") -> str:
pattern = rf"{key}:\s*(.+)"
match = re.search(pattern, content, re.IGNORECASE)
return match.group(1).strip() if match else default
complexity = extract("复杂度", "normal").lower()
if complexity not in ("simple", "normal", "complex", "hard"):
complexity = "normal"
price_min, price_max = self.PRICE_MAP.get(complexity, (15, 20))
price_suggest = round((price_min + price_max) / 2 / 5) * 5
# 文字加价
has_text = extract("含文字", "no").lower()
if has_text in ("yes", "partial") and complexity in ("simple", "normal"):
price_suggest += 5
# 人脸加价
has_face = extract("含人脸", "no").lower()
if has_face == "yes":
price_suggest += 5
# 精细度加价
detail_level = extract("细节层级", "中等")
if detail_level == "极精细":
price_suggest += 10
elif detail_level == "精细":
price_suggest += 5
return {
"url": url,
"complexity": complexity,
"reason": extract("原因"),
"subject": extract("主体"),
"proc_type": extract("类型"),
"quality": extract("质量"),
"resolution": extract("分辨率评估", "足够"),
"flatness": extract("平整度", "mild").lower(),
"has_text": has_text,
"text_amount": extract("文字量", "none"),
"has_face": has_face,
"face_clarity": extract("人脸清晰度", ""),
"has_shadow": extract("阴影", "no").lower(),
"color": extract("色彩", "全彩"),
"main_color": extract("主色调", ""),
"detail_level": detail_level,
"edge_clarity": extract("边缘清晰度", "清晰"),
"background": extract("背景", "简单"),
"risk": extract("风险", "none").lower(),
"feasibility": extract("可做", "yes").lower(),
"sensitive": extract("敏感内容", "no").lower(),
"perspective": extract("透视", "no").lower(),
"aspect_ratio": extract("比例", "1:1"),
"est_time": extract("预估工时", "15分钟"),
"difficulty": extract("难点", ""),
"suggest_method": extract("建议方案", "AI处理"),
"gemini_prompt": extract("提示词"),
"note": extract("备注"),
"price_min": price_min,
"price_max": price_max,
"price_suggest": price_suggest,
"analyzed_at": datetime.now().isoformat(),
"success": True
}
def _fallback(self, url: str, reason: str) -> Dict[str, Any]:
"""分析失败时的默认结果"""
from datetime import datetime
return {
"url": url,
"complexity": "normal",
"reason": reason,
"subject": "",
"proc_type": "",
"quality": "",
"resolution": "",
"flatness": "",
"has_text": "no",
"text_amount": "none",
"has_face": "no",
"face_clarity": "",
"has_shadow": "no",
"color": "",
"main_color": "",
"detail_level": "中等",
"edge_clarity": "",
"background": "",
"risk": "none",
"feasibility": "yes",
"sensitive": "no",
"perspective": "no",
"aspect_ratio": "1:1",
"est_time": "",
"difficulty": "",
"suggest_method": "",
"gemini_prompt": "",
"note": "",
"price_min": 15,
"price_max": 20,
"price_suggest": 20,
"width": 0,
"height": 0,
"analyzed_at": datetime.now().isoformat(),
"success": False
}
# 全局实例
image_analyzer_service = ImageAnalyzerService()