Files
tw/image/image_analyzer.py
2026-02-27 16:03:04 +08:00

594 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
图片复杂度识别模块
使用智谱 GLM-4V 视觉模型分析客户发来的图片,
判断处理难度为客服AI提供报价依据。
复杂度等级(越平整越便宜):
simple → 10-15元画面平整、无小字、无人脸、无阴影
normal → 15-20元一般复杂度
complex → 20-25元有褶皱/小字/人脸/阴影)
hard → 25-30元非常复杂
报价维度:平整度、含文字(小字加价)、含人脸、阴影。
同一 URL 5 分钟内复用缓存,节省 API 调用。
"""
import os
import asyncio
import base64
import time
from typing import Optional, Tuple
from openai import AsyncOpenAI
from dotenv import load_dotenv
from PIL import Image
import aiohttp
load_dotenv()
ANALYSIS_PROMPT = """你是一个电商图片处理评估专家,同时也是 Gemini 图像生成提示词专家。
请仔细分析这张图片,输出以下字段,每行一个,不要多余内容:
敏感内容: <yes|no>
平整度: <flat|mild|rough>
含文字: <yes|no>
含人脸: <yes|no>
阴影: <yes|no>
复杂度: <simple|normal|complex|hard>
原因: <15字以内说明复杂度判断依据>
主体: <图片核心内容,如:印花图案/logo/人物/产品/老照片/风景/文字/其他>
类型: <处理类型,如:印花提取/高清修复/去背景/老照片修复/logo提取/人像修复/其他>
质量: <原图质量,如:清晰/轻微模糊/严重模糊/低分辨率/截图/扫描件>
可做: <yes|partial|no>
风险: <none|low|high>
透视: <no|mild|strong>
比例: <从以下选一个最合适的1:1 / 9:16 / 16:9 / 3:4 / 4:3 / 3:2 / 2:3 / 5:4 / 4:5>
提示词: <为 Gemini 写处理指令中文60字以内说明要做什么、保留什么、去掉什么>
备注: <给客服AI的特别提示没有则填无>
判断规则:
【报价核心:越平整越便宜】
- 平整度 flat画面平整、无褶皱、无透视 → 便宜
- 平整度 mild轻微褶皱/透视 → 中等
- 平整度 rough有褶皱/透视/曲面 → 贵
- 含文字:大字没关系不加价;小字需精细保留/清晰化 → 加价(含文字填 yes 仅指有小字的情况)
- 含人脸 yes有人脸 → 加价
- 阴影 yes有明显阴影需处理 → 加价
综合以上因素,越平整、无小字、无人脸、无阴影 → 越便宜simple
【含文字】
- yes含小字需精细保留/清晰化(小字难处理 → 加价)
- no无文字或仅有大字大字没关系 → 不加价)
【含人脸】
- yes图中有真实人物面孔人像照/集体照/证件照/老照片等)
- no无人脸或人脸极小不影响主体
【风险评估】
- none印花/图案/logo/风景/产品AI处理效果稳定可直接报价
- low有人脸但清晰度尚可AI修复后人脸相似度70-90%,建议先看效果
- high以下任一情况 → 严重模糊的人脸照片/老照片人像/需要打印/客户问能否找回原图
high情况下可做改为partial备注写明风险话术
【敏感内容】优先判断,若为 yes 则 可做 必填 no
- yes图片含色情/黄色/擦边/裸露/性暗示/大尺度等违规内容
- no无上述敏感内容
【可做判断】
- yes效果有把握可直接处理
- partial能处理但有明显限制人脸变形风险/分辨率极低/严重损坏)
- no无法处理纯黑/纯白/完全损坏/找原始RAW文件/敏感内容)
【风险话术模板(备注字段)】
- 含人脸+需打印AI修复后人脸可能有轻微变化建议先看效果确认再打印
- 严重模糊人脸:这张模糊程度较高,修复后清晰了但人脸可能跟原来有差异
- 找原图:找不到原始文件,只能对现有图片做高清修复处理
- 完全损坏:这张无法处理
【透视判断】
- no正面拍摄无明显变形
- mild轻微透视衣服悬挂/桌面小角度斜拍)
- strong严重透视俯拍/贴墙/大角度倾斜)
【比例选择】
- 印花/图案/logo/正方形 -> 1:1
- 竖屏壁纸/短视频封面 -> 9:16
- 宽屏/横版视频 -> 16:9
- 移动广告/Instagram竖图 -> 4:5
- 竖向人像/海报/证件照 -> 3:4
- 竖向相机照片 -> 2:3
- 接近正方形产品图 -> 5:4
- 横向标准图/风景 -> 4:3
- 横向相机照片/产品实拍 -> 3:2
示例1印花无风险
敏感内容: no
平整度: mild
含文字: no
含人脸: no
阴影: no
复杂度: complex
原因: 印花细节密集颜色层次多
主体: 印花图案
类型: 印花提取
质量: 轻微模糊
可做: yes
风险: none
透视: mild
比例: 1:1
提示词: 提取衣物印花图案去除褶皱和背景杂色补全缺失部分保持颜色细节100%还原,输出干净平面印花图
备注: 无
示例2人像老照片要打印
敏感内容: no
平整度: flat
含文字: no
含人脸: yes
阴影: no
复杂度: hard
原因: 严重模糊人脸细节丢失
主体: 人物照片
类型: 人像修复
质量: 严重模糊
可做: partial
风险: high
透视: no
比例: 3:4
提示词: 对模糊人像进行高清修复,增强细节,保持人物特征不变
备注: AI修复后人脸可能有轻微变化建议先看效果确认满意再用于打印
示例3平整印花最便宜
敏感内容: no
平整度: flat
含文字: no
含人脸: no
阴影: no
复杂度: simple
原因: 画面平整无褶皱无文字无人脸
主体: 印花图案
类型: 印花提取
质量: 清晰
可做: yes
风险: none
透视: no
比例: 1:1
提示词: 提取印花图案,去除背景,输出干净平面图
备注: 无"""
class ImageAnalyzer:
"""图片复杂度分析器"""
# 同一 URL 5 分钟内复用结果,节省 API 调用
_CACHE_TTL_SECONDS = 300
_analysis_cache: dict = {} # url -> (result_dict, timestamp)
PRICE_MAP = {
"simple": (10, 15, "画面简单干净"),
"normal": (15, 20, "一般复杂度"),
"complex": (20, 25, "细节偏多"),
"hard": (25, 30, "非常复杂"),
}
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.base_url = os.getenv("OPENAI_BASE_URL", "https://open.bigmodel.cn/api/paas/v4")
# 视觉模型,智谱 GLM-4V 系列
self.vision_model = os.getenv("VISION_MODEL", "glm-4v-flash")
def _is_url(self, image_path: str) -> bool:
return image_path.startswith("http://") or image_path.startswith("https://")
def _load_image_base64(self, image_path: str) -> Optional[str]:
"""本地图片转 base64"""
try:
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
except Exception as e:
print(f"[ImageAnalyzer] 读取图片失败: {e}")
return None
async def _get_image_size(self, image_path: str) -> Tuple[int, int]:
"""获取图片像素尺寸 (width, height)URL 或 本地路径"""
try:
if self._is_url(image_path):
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(image_path) as resp:
if resp.status != 200:
return (0, 0)
data = await resp.read()
from io import BytesIO
with Image.open(BytesIO(data)) as img:
w, h = img.size
return (int(w), int(h))
else:
with Image.open(image_path) as img:
w, h = img.size
return (int(w), int(h))
except Exception as e:
print(f"[ImageAnalyzer] 获取尺寸失败: {e}")
return (0, 0)
# 最短等待时间即使AI极快返回也等这么久看起来像真人在找
MIN_WAIT_SECONDS = 4
async def analyze(self, image_path: str) -> dict:
"""
异步分析图片复杂度(使用火山引擎 /responses 接口)。
实际等待时间 = max(视觉AI响应时间, MIN_WAIT_SECONDS)
Args:
image_path: 图片URL 或 本地路径
Returns:
{
"complexity": "simple|normal|complex|hard",
"reason": "原因描述",
"price_min": 最低报价,
"price_max": 最高报价,
"price_suggest": 建议报价,
"elapsed": 实际耗时秒数,
"success": True/False
}
"""
if not self.api_key:
await asyncio.sleep(self.MIN_WAIT_SECONDS)
return self._fallback("未配置 API Key")
# 缓存:仅对 URL 生效,本地路径不缓存
cache_key = image_path if self._is_url(image_path) else None
if cache_key:
now = time.monotonic()
cached = self._analysis_cache.get(cache_key)
if cached:
result, cached_at = cached
if now - cached_at < self._CACHE_TTL_SECONDS:
print(f"[ImageAnalyzer] 缓存命中 | URL 已分析过,跳过 API 调用")
result = dict(result)
result["elapsed"] = 0
return result
else:
del self._analysis_cache[cache_key]
start = time.monotonic()
try:
# 构建图片内容
if self._is_url(image_path):
image_item = {
"type": "input_image",
"image_url": image_path
}
else:
b64 = self._load_image_base64(image_path)
if not b64:
await asyncio.sleep(self.MIN_WAIT_SECONDS)
return self._fallback("图片读取失败")
image_item = {
"type": "input_image",
"image_url": f"data:image/jpeg;base64,{b64}"
}
# 使用火山引擎官方 SDKAsyncOpenAI + /responses 接口)
client = AsyncOpenAI(
base_url=self.base_url,
api_key=self.api_key,
)
response = await client.responses.create(
model=self.vision_model,
input=[
{
"role": "user",
"content": [
image_item,
{
"type": "input_text",
"text": ANALYSIS_PROMPT
}
]
}
]
)
content = response.output_text
elapsed = time.monotonic() - start
print(f"[ImageAnalyzer] 视觉AI响应耗时: {elapsed:.1f}s")
await self._wait_remaining(elapsed)
result = self._parse_result(content)
result["elapsed"] = elapsed
# 计算尺寸与类型加价
try:
w, h = await self._get_image_size(image_path)
mp = round((w * h) / 1_000_000, 2) if w and h else 0.0
result["width"] = w
result["height"] = h
result["megapixels"] = mp
# 归一化类型
subj = (result.get("subject") or "").lower()
ptype = (result.get("proc_type") or "").lower()
ratio = result.get("aspect_ratio") or "1:1"
category = "general"
# 初步判断
if ("壁纸" in subj) or ("wallpaper" in subj) or ratio in ("9:16", "16:9"):
category = "wallpaper"
elif ("" in subj) or ("" in subj) or ("印花" in subj) or ("fabric" in subj) or ("cloth" in subj) or ("服装" in subj) or ("印花" in ptype):
category = "clothing"
elif ("logo" in subj) or ("logo" in ptype):
category = "logo"
elif ("海报" in subj) or ("poster" in subj):
category = "poster"
elif ("人像" in subj) or ("人物" in subj) or ("portrait" in subj):
category = "portrait"
elif ("产品" in subj) or ("product" in subj):
category = "product"
elif ("老照片" in subj) or ("old photo" in subj):
category = "old_photo"
# 可印花/印刷物体扩展
keywords = subj + " " + ptype
if any(k in keywords for k in ["装饰画", "挂画", "油画", "canvas", "painting"]):
category = "decor_painting"
elif any(k in keywords for k in ["窗帘", "curtain"]):
category = "curtain"
elif any(k in keywords for k in ["地垫", "脚垫", "地毯", "", "mat", "rug"]):
category = "floor_mat"
elif any(k in keywords for k in ["广告牌", "喷绘", "展架", "灯箱", "banner", "billboard"]):
category = "billboard"
elif any(k in keywords for k in ["毯子", "毛毯", "blanket"]):
category = "blanket"
elif any(k in keywords for k in ["桌布", "台布", "tablecloth", "桌旗"]):
category = "tablecloth"
elif any(k in keywords for k in ["书本", "书籍", "封面", "book", "book cover"]):
category = "book"
elif any(k in keywords for k in ["鼠标垫", "mouse pad", "mousepad"]):
category = "mouse_pad"
elif any(k in keywords for k in ["头像", "个人头像", "个人照", "profile", "avatar"]):
category = "avatar"
result["category"] = category
surcharge = 0
size_note = ""
# 按类别设定尺寸要求与加价阈值(单位:百万像素)
if category == "wallpaper":
if h and h < 1920:
size_note = "壁纸高度低于1920px清晰度可能不足"
if mp > 8:
surcharge = 10
elif mp > 3:
surcharge = 5
elif category == "clothing":
if (w and w < 1024) or (h and h < 1024):
size_note = "印花源图边长低于1024px放大后细节可能不足"
if mp > 6:
surcharge = 10
elif mp > 2:
surcharge = 5
elif category in ("poster", "portrait", "product"):
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "logo":
if mp > 6:
surcharge = 5
elif category == "decor_painting":
if (w and w < 1500) or (h and h < 1500):
size_note = "装饰画边长低于1500px打印放大可能不够清晰"
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "curtain":
if (w and w < 1500):
size_note = "窗帘宽度低于1500px印花放大可能不够清晰"
if mp > 16:
surcharge = 10
elif mp > 8:
surcharge = 5
elif category == "floor_mat":
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "billboard":
if (w and w < 2000) or (h and h < 1000):
size_note = "广告牌尺寸较小,建议更高分辨率以保证喷绘清晰"
if mp > 20:
surcharge = 10
elif mp > 10:
surcharge = 5
elif category == "blanket":
if mp > 16:
surcharge = 10
elif mp > 8:
surcharge = 5
elif category == "tablecloth":
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "book":
if (w and w < 800):
size_note = "书本封面宽度低于800px印刷细节可能不足"
if mp > 6:
surcharge = 5
elif category == "mouse_pad":
if (w and w < 1000):
size_note = "鼠标垫源图宽度低于1000px细节可能不足"
if mp > 4:
surcharge = 5
elif category == "avatar":
if (w and w < 800) or (h and h < 800):
size_note = "头像边长低于800px清晰度可能不足"
if mp > 6:
surcharge = 5
else:
if mp > 8:
surcharge = 10
elif mp > 4:
surcharge = 5
# 应用加价保持5的整数倍与 10-30 区间
base = result.get("price_suggest", 20)
adjusted = base + surcharge
adjusted = max(10, min(30, adjusted))
adjusted = round(adjusted / 5) * 5
# 同步范围
result["price_suggest"] = adjusted
result["price_max"] = max(result["price_max"], adjusted)
result["size_surcharge"] = surcharge
result["size_note"] = size_note
except Exception as e:
print(f"[ImageAnalyzer] 尺寸与类型加价计算失败: {e}")
# 写入缓存
if cache_key:
self._analysis_cache[cache_key] = (dict(result), time.monotonic())
# 简单清理:缓存超过 50 条时删最旧的
if len(self._analysis_cache) > 50:
oldest = min(self._analysis_cache.items(), key=lambda x: x[1][1])
del self._analysis_cache[oldest[0]]
return result
except asyncio.TimeoutError:
elapsed = time.monotonic() - start
print(f"[ImageAnalyzer] 请求超时 ({elapsed:.1f}s)")
return self._fallback("请求超时")
except Exception as e:
elapsed = time.monotonic() - start
print(f"[ImageAnalyzer] 分析失败: {e}")
await self._wait_remaining(elapsed)
return self._fallback(str(e))
async def _wait_remaining(self, elapsed: float):
"""补足最短等待时间"""
remaining = self.MIN_WAIT_SECONDS - elapsed
if remaining > 0:
await asyncio.sleep(remaining)
def _parse_line(self, content: str, *keys: str) -> str:
"""从多行文本中提取指定字段值,支持中英文冒号"""
for line in content.strip().split("\n"):
line = line.strip()
for key in keys:
if line.startswith(key):
return line.split(":", 1)[-1].split("", 1)[-1].strip()
return ""
def _parse_result(self, content: str) -> dict:
"""解析模型返回的结果"""
p = self._parse_line
# 复杂度
complexity_raw = p(content, "复杂度:", "复杂度:").lower()
complexity = complexity_raw if complexity_raw in self.PRICE_MAP else "normal"
sensitive = p(content, "敏感内容:", "敏感内容:").lower().strip()
flatness = p(content, "平整度:", "平整度:").lower().strip() # flat|mild|rough
has_text = p(content, "含文字:", "含文字:").lower().strip()
has_face = p(content, "含人脸:", "含人脸:").lower().strip()
has_shadow = p(content, "阴影:", "阴影:").lower().strip()
reason = p(content, "原因:", "原因:")
subject = p(content, "主体:", "主体:")
proc_type = p(content, "类型:", "类型:")
quality = p(content, "质量:", "质量:")
feasibility = p(content, "可做:", "可做:").lower()
risk = p(content, "风险:", "风险:").lower().strip()
perspective = p(content, "透视:", "透视:").lower().strip()
aspect_ratio = p(content, "比例:", "比例:").strip()
gemini_prompt= p(content, "提示词:", "提示词:")
note = p(content, "备注:", "备注:")
if has_face not in ("yes", "no"):
has_face = "no"
if risk not in ("none", "low", "high"):
risk = "none"
if perspective not in ("no", "mild", "strong"):
perspective = "no"
# 校验比例合法性
valid_ratios = {"1:1", "9:16", "16:9", "3:4", "4:3", "3:2", "2:3", "5:4", "4:5"}
if aspect_ratio not in valid_ratios:
aspect_ratio = "1:1" # 默认正方形
price_min, price_max, default_reason = self.PRICE_MAP[complexity]
if not reason:
reason = default_reason
if feasibility not in ("yes", "partial", "no"):
feasibility = "yes"
# 建议报价complex/hard 取固定值simple/normal 取中间且必须为5的整数倍
raw = price_max if complexity in ("complex", "hard") else (price_min + price_max) // 2
price_suggest = round(raw / 5) * 5
if sensitive == "yes":
feasibility = "no"
note = "图片含敏感内容,不接单"
risk_label = {"none": "无风险", "low": "低风险", "high": "高风险"}.get(risk, "")
sens_tag = " | 敏感:是" if sensitive == "yes" else ""
print(f"[ImageAnalyzer] 识别结果: {complexity} | {reason} | 建议报价: {price_suggest}{sens_tag}")
print(f"[ImageAnalyzer] 主体: {subject} | 类型: {proc_type} | 质量: {quality} | 平整度: {flatness} | 含文字: {has_text} | 含人脸: {has_face} | 阴影: {has_shadow} | 风险: {risk_label} | 透视: {perspective} | 比例: {aspect_ratio} | 可做: {feasibility}")
if gemini_prompt:
print(f"[ImageAnalyzer] Gemini提示词: {gemini_prompt}")
if note and note not in ("", ""):
print(f"[ImageAnalyzer] 备注: {note}")
return {
"complexity": complexity,
"reason": reason,
"subject": subject,
"proc_type": proc_type,
"quality": quality,
"flatness": flatness if flatness in ("flat", "mild", "rough") else "",
"has_text": has_text if has_text in ("yes", "no") else "no",
"has_face": has_face, # yes / no
"has_shadow": has_shadow if has_shadow in ("yes", "no") else "no",
"risk": risk, # none / low / high
"feasibility": feasibility,
"perspective": perspective,
"aspect_ratio": aspect_ratio,
"gemini_prompt": gemini_prompt,
"note": note,
"price_min": price_min,
"price_max": price_max,
"price_suggest": price_suggest,
"success": True
}
def _fallback(self, reason: str) -> dict:
"""识别失败时的默认结果(返回 normal让人工判断"""
print(f"[ImageAnalyzer] 识别失败,使用默认值: {reason}")
return {
"complexity": "normal",
"reason": reason,
"subject": "",
"proc_type": "",
"quality": "",
"flatness": "",
"has_text": "no",
"has_face": "no",
"has_shadow": "no",
"risk": "none",
"feasibility": "yes",
"perspective": "no",
"aspect_ratio": "1:1",
"gemini_prompt": "",
"note": "",
"price_min": 20,
"price_max": 30,
"price_suggest": 25,
"success": False
}
# 全局实例
image_analyzer = ImageAnalyzer()