""" 图片分析服务 - 后台静默分析图片,用于数据标定 使用智谱 GLM-4V 视觉模型分析客户发来的图片 """ import os import asyncio import base64 import time import json import logging from typing import Optional, Dict, Any from openai import AsyncOpenAI from dotenv import load_dotenv import aiohttp from PIL import Image from io import BytesIO load_dotenv() logger = logging.getLogger("cs_agent") ANALYSIS_PROMPT = """你是一个电商图片处理评估专家。 客户需求如下: {customer_requirement} 请结合客户需求和图片内容一起判断,不要只看图片本身。 如果客户明确说了“找原图/找图/素材/大图”,类型优先判断为“找原图/素材提取”类; 如果客户明确说了“修复/高清/清晰/放大”,类型优先判断为“高清修复”类。 请仔细分析这张图片,输出以下字段,每行一个,不要多余内容: 敏感内容: 平整度: 含文字: 文字量: 含人脸: 人脸清晰度: <无|清晰|模糊|遮挡> 阴影: 复杂度: 原因: <15字以内,说明复杂度判断依据> 主体: <图片核心内容,如:印花图案/logo/人物/产品/老照片/风景/文字/其他> 类型: <处理类型,如:印花提取/高清修复/去背景/老照片修复/logo提取/人像修复/其他> 质量: <原图质量,如:清晰/轻微模糊/严重模糊/低分辨率/截图/扫描件> 分辨率评估: <足够|偏低|严重不足> 色彩: <单色|双色|多色|渐变|全彩> 主色调: <白/黑/红/蓝/绿/黄/棕/灰/多彩等> 细节层级: <简约|中等|精细|极精细> 边缘清晰度: <清晰|模糊|毛糙> 背景: <纯色|简单|复杂|透明> 可做: 风险: 透视: 比例: <从以下选一个最合适的:1:1 / 9:16 / 16:9 / 3:4 / 4:3 / 3:2 / 2:3 / 5:4 / 4:5> 预估工时: <5分钟/15分钟/30分钟/1小时/2小时以上> 难点: <具体说明处理难点,如:细节多/透视矫正/文字提取等,15字内> 建议方案: 提示词: <为 Gemini 写处理指令,中文,80字以内,要详细具体> 备注: <给客服AI的特别提示,没有则填无> 判断规则: - 平整度 flat:画面平整、无褶皱、无透视 → 便宜 - 平整度 mild:轻微褶皱/透视 → 中等 - 平整度 rough:有褶皱/透视/曲面 → 贵 - 含文字 yes:有小字要精细保留 - 含人脸 yes:有人脸需处理 → 加价 - 敏感内容=yes 时,可做必须填 no - 细节层级影响工时和价格 """ def _sanitize_title_part(text: str) -> str: value = str(text or "").strip() value = value.replace("/", "_").replace("\\", "_") value = " ".join(value.split()) return value[:20] def _build_title_suggest(subject: str, proc_type: str, customer_requirement: str) -> str: subject_part = _sanitize_title_part(subject) proc_part = _sanitize_title_part(proc_type) req_part = _sanitize_title_part(customer_requirement) parts = [part for part in (subject_part, proc_part) if part] if parts: return "_".join(parts[:2]) if req_part: return req_part return "图片识别结果" class ImageAnalyzerService: """图片分析服务 - 后台静默运行,不影响主流程""" _CACHE_TTL_SECONDS = 300 _analysis_cache: Dict[str, tuple] = {} PRICE_MAP = { "simple": (10, 15), "normal": (15, 20), "complex": (20, 25), "hard": (25, 30), } def __init__(self): self.api_key = os.getenv("OPENAI_API_KEY") self.base_url = os.getenv("OPENAI_BASE_URL", "https://open.bigmodel.cn/api/paas/v4") self.vision_model = os.getenv("VISION_MODEL", "glm-4v-flash") def _is_url(self, path: str) -> bool: return path.startswith("http://") or path.startswith("https://") async def _get_image_size(self, image_path: str) -> tuple: """获取图片尺寸""" try: if self._is_url(image_path): timeout = aiohttp.ClientTimeout(total=10) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(image_path) as resp: if resp.status != 200: return (0, 0) data = await resp.read() with Image.open(BytesIO(data)) as img: return img.size else: with Image.open(image_path) as img: return img.size except Exception as e: logger.debug(f"[ImageAnalyzer] 获取尺寸失败: {e}") return (0, 0) async def analyze(self, image_url: str, customer_requirement: str = "") -> Dict[str, Any]: """ 异步分析图片,返回结构化结果 Returns: { "url": 图片URL, "complexity": simple|normal|complex|hard, "subject": 主体描述, "proc_type": 处理类型, "quality": 质量评估, "flatness": flat|mild|rough, "has_text": yes|no, "has_face": yes|no, "has_shadow": yes|no, "risk": none|low|high, "feasibility": yes|partial|no, "perspective": no|mild|strong, "aspect_ratio": 比例, "gemini_prompt": 处理提示词, "title_suggest": 推荐标题, "note": 备注, "price_suggest": 建议价格, "width": 宽度, "height": 高度, "analyzed_at": 分析时间, "success": True/False } """ if not self.api_key: return self._fallback(image_url, "未配置 API Key") # 缓存检查 customer_requirement = str(customer_requirement or "").strip() cache_key = f"{image_url}|{customer_requirement}" now = time.monotonic() cached = self._analysis_cache.get(cache_key) if cached: result, cached_at = cached if now - cached_at < self._CACHE_TTL_SECONDS: logger.debug(f"[ImageAnalyzer] 缓存命中: {image_url[:50]}...") return dict(result) else: del self._analysis_cache[cache_key] start = time.monotonic() try: client = AsyncOpenAI(base_url=self.base_url, api_key=self.api_key) prompt_text = ANALYSIS_PROMPT.format( customer_requirement=customer_requirement or "未提供明确补充需求" ) response = await asyncio.wait_for( client.chat.completions.create( model=self.vision_model, messages=[{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt_text} ] }], max_tokens=500 ), timeout=30 ) if not response.choices: return self._fallback(image_url, "API 返回空 choices") content = response.choices[0].message.content or "" elapsed = time.monotonic() - start result = self._parse_result(image_url, content) result["customer_requirement"] = customer_requirement result["title_suggest"] = _build_title_suggest( result.get("subject", ""), result.get("proc_type", ""), customer_requirement, ) result["elapsed"] = round(elapsed, 2) # 获取尺寸 w, h = await self._get_image_size(image_url) result["width"] = w result["height"] = h # 缓存 self._analysis_cache[cache_key] = (result, now) # 详细日志 log_parts = [ f"主体={result.get('subject', '?')}", f"类型={result.get('proc_type', '?')}", f"复杂度={result.get('complexity', '?')}", f"色彩={result.get('color', '?')}", f"细节={result.get('detail_level', '?')}", f"工时={result.get('est_time', '?')}", f"方案={result.get('suggest_method', '?')}", f"¥{result.get('price_suggest', 0)}", ] logger.info(f"[ImageAnalyzer] 分析完成: {' | '.join(log_parts)}") if result.get('difficulty'): logger.info(f"[ImageAnalyzer] 难点: {result.get('difficulty')}") if result.get('gemini_prompt'): logger.info(f"[ImageAnalyzer] Gemini提示词: {result.get('gemini_prompt')}") return result except asyncio.TimeoutError: logger.warning(f"[ImageAnalyzer] 分析超时: {image_url[:50]}...") return self._fallback(image_url, "分析超时") except Exception as e: logger.warning(f"[ImageAnalyzer] 分析失败: {e}") return self._fallback(image_url, str(e)) def _parse_result(self, url: str, content: str) -> Dict[str, Any]: """解析视觉模型返回的文本""" import re from datetime import datetime def extract(key: str, default: str = "") -> str: pattern = rf"{key}:\s*(.+)" match = re.search(pattern, content, re.IGNORECASE) return match.group(1).strip() if match else default complexity = extract("复杂度", "normal").lower() if complexity not in ("simple", "normal", "complex", "hard"): complexity = "normal" price_min, price_max = self.PRICE_MAP.get(complexity, (15, 20)) price_suggest = round((price_min + price_max) / 2 / 5) * 5 # 文字加价 has_text = extract("含文字", "no").lower() if has_text in ("yes", "partial") and complexity in ("simple", "normal"): price_suggest += 5 # 人脸加价 has_face = extract("含人脸", "no").lower() if has_face == "yes": price_suggest += 5 # 精细度加价 detail_level = extract("细节层级", "中等") if detail_level == "极精细": price_suggest += 10 elif detail_level == "精细": price_suggest += 5 return { "url": url, "customer_requirement": "", "complexity": complexity, "reason": extract("原因"), "subject": extract("主体"), "proc_type": extract("类型"), "quality": extract("质量"), "resolution": extract("分辨率评估", "足够"), "flatness": extract("平整度", "mild").lower(), "has_text": has_text, "text_amount": extract("文字量", "none"), "has_face": has_face, "face_clarity": extract("人脸清晰度", "无"), "has_shadow": extract("阴影", "no").lower(), "color": extract("色彩", "全彩"), "main_color": extract("主色调", ""), "detail_level": detail_level, "edge_clarity": extract("边缘清晰度", "清晰"), "background": extract("背景", "简单"), "risk": extract("风险", "none").lower(), "feasibility": extract("可做", "yes").lower(), "sensitive": extract("敏感内容", "no").lower(), "perspective": extract("透视", "no").lower(), "aspect_ratio": extract("比例", "1:1"), "est_time": extract("预估工时", "15分钟"), "difficulty": extract("难点", ""), "suggest_method": extract("建议方案", "AI处理"), "gemini_prompt": extract("提示词"), "title_suggest": _build_title_suggest( extract("主体"), extract("类型"), "", ), "note": extract("备注"), "price_min": price_min, "price_max": price_max, "price_suggest": price_suggest, "analyzed_at": datetime.now().isoformat(), "success": True } def _fallback(self, url: str, reason: str) -> Dict[str, Any]: """分析失败时的默认结果""" from datetime import datetime return { "url": url, "customer_requirement": "", "complexity": "normal", "reason": reason, "subject": "", "proc_type": "", "quality": "", "resolution": "", "flatness": "", "has_text": "no", "text_amount": "none", "has_face": "no", "face_clarity": "无", "has_shadow": "no", "color": "", "main_color": "", "detail_level": "中等", "edge_clarity": "", "background": "", "risk": "none", "feasibility": "yes", "sensitive": "no", "perspective": "no", "aspect_ratio": "1:1", "est_time": "", "difficulty": "", "suggest_method": "", "gemini_prompt": "", "title_suggest": "图片识别结果", "note": "", "price_min": 15, "price_max": 20, "price_suggest": 20, "width": 0, "height": 0, "analyzed_at": datetime.now().isoformat(), "success": False } # 全局实例 image_analyzer_service = ImageAnalyzerService()