This commit is contained in:
2026-02-27 16:03:04 +08:00
commit 5aedf1665d
137 changed files with 17604 additions and 0 deletions

593
image/image_analyzer.py Normal file
View File

@@ -0,0 +1,593 @@
"""
图片复杂度识别模块
使用智谱 GLM-4V 视觉模型分析客户发来的图片,
判断处理难度为客服AI提供报价依据。
复杂度等级(越平整越便宜):
simple → 10-15元画面平整、无小字、无人脸、无阴影
normal → 15-20元一般复杂度
complex → 20-25元有褶皱/小字/人脸/阴影)
hard → 25-30元非常复杂
报价维度:平整度、含文字(小字加价)、含人脸、阴影。
同一 URL 5 分钟内复用缓存,节省 API 调用。
"""
import os
import asyncio
import base64
import time
from typing import Optional, Tuple
from openai import AsyncOpenAI
from dotenv import load_dotenv
from PIL import Image
import aiohttp
load_dotenv()
ANALYSIS_PROMPT = """你是一个电商图片处理评估专家,同时也是 Gemini 图像生成提示词专家。
请仔细分析这张图片,输出以下字段,每行一个,不要多余内容:
敏感内容: <yes|no>
平整度: <flat|mild|rough>
含文字: <yes|no>
含人脸: <yes|no>
阴影: <yes|no>
复杂度: <simple|normal|complex|hard>
原因: <15字以内说明复杂度判断依据>
主体: <图片核心内容,如:印花图案/logo/人物/产品/老照片/风景/文字/其他>
类型: <处理类型,如:印花提取/高清修复/去背景/老照片修复/logo提取/人像修复/其他>
质量: <原图质量,如:清晰/轻微模糊/严重模糊/低分辨率/截图/扫描件>
可做: <yes|partial|no>
风险: <none|low|high>
透视: <no|mild|strong>
比例: <从以下选一个最合适的1:1 / 9:16 / 16:9 / 3:4 / 4:3 / 3:2 / 2:3 / 5:4 / 4:5>
提示词: <为 Gemini 写处理指令中文60字以内说明要做什么、保留什么、去掉什么>
备注: <给客服AI的特别提示没有则填无>
判断规则:
【报价核心:越平整越便宜】
- 平整度 flat画面平整、无褶皱、无透视 → 便宜
- 平整度 mild轻微褶皱/透视 → 中等
- 平整度 rough有褶皱/透视/曲面 → 贵
- 含文字:大字没关系不加价;小字需精细保留/清晰化 → 加价(含文字填 yes 仅指有小字的情况)
- 含人脸 yes有人脸 → 加价
- 阴影 yes有明显阴影需处理 → 加价
综合以上因素,越平整、无小字、无人脸、无阴影 → 越便宜simple
【含文字】
- yes含小字需精细保留/清晰化(小字难处理 → 加价)
- no无文字或仅有大字大字没关系 → 不加价)
【含人脸】
- yes图中有真实人物面孔人像照/集体照/证件照/老照片等)
- no无人脸或人脸极小不影响主体
【风险评估】
- none印花/图案/logo/风景/产品AI处理效果稳定可直接报价
- low有人脸但清晰度尚可AI修复后人脸相似度70-90%,建议先看效果
- high以下任一情况 → 严重模糊的人脸照片/老照片人像/需要打印/客户问能否找回原图
high情况下可做改为partial备注写明风险话术
【敏感内容】优先判断,若为 yes 则 可做 必填 no
- yes图片含色情/黄色/擦边/裸露/性暗示/大尺度等违规内容
- no无上述敏感内容
【可做判断】
- yes效果有把握可直接处理
- partial能处理但有明显限制人脸变形风险/分辨率极低/严重损坏)
- no无法处理纯黑/纯白/完全损坏/找原始RAW文件/敏感内容)
【风险话术模板(备注字段)】
- 含人脸+需打印AI修复后人脸可能有轻微变化建议先看效果确认再打印
- 严重模糊人脸:这张模糊程度较高,修复后清晰了但人脸可能跟原来有差异
- 找原图:找不到原始文件,只能对现有图片做高清修复处理
- 完全损坏:这张无法处理
【透视判断】
- no正面拍摄无明显变形
- mild轻微透视衣服悬挂/桌面小角度斜拍)
- strong严重透视俯拍/贴墙/大角度倾斜)
【比例选择】
- 印花/图案/logo/正方形 -> 1:1
- 竖屏壁纸/短视频封面 -> 9:16
- 宽屏/横版视频 -> 16:9
- 移动广告/Instagram竖图 -> 4:5
- 竖向人像/海报/证件照 -> 3:4
- 竖向相机照片 -> 2:3
- 接近正方形产品图 -> 5:4
- 横向标准图/风景 -> 4:3
- 横向相机照片/产品实拍 -> 3:2
示例1印花无风险
敏感内容: no
平整度: mild
含文字: no
含人脸: no
阴影: no
复杂度: complex
原因: 印花细节密集颜色层次多
主体: 印花图案
类型: 印花提取
质量: 轻微模糊
可做: yes
风险: none
透视: mild
比例: 1:1
提示词: 提取衣物印花图案去除褶皱和背景杂色补全缺失部分保持颜色细节100%还原,输出干净平面印花图
备注: 无
示例2人像老照片要打印
敏感内容: no
平整度: flat
含文字: no
含人脸: yes
阴影: no
复杂度: hard
原因: 严重模糊人脸细节丢失
主体: 人物照片
类型: 人像修复
质量: 严重模糊
可做: partial
风险: high
透视: no
比例: 3:4
提示词: 对模糊人像进行高清修复,增强细节,保持人物特征不变
备注: AI修复后人脸可能有轻微变化建议先看效果确认满意再用于打印
示例3平整印花最便宜
敏感内容: no
平整度: flat
含文字: no
含人脸: no
阴影: no
复杂度: simple
原因: 画面平整无褶皱无文字无人脸
主体: 印花图案
类型: 印花提取
质量: 清晰
可做: yes
风险: none
透视: no
比例: 1:1
提示词: 提取印花图案,去除背景,输出干净平面图
备注: 无"""
class ImageAnalyzer:
"""图片复杂度分析器"""
# 同一 URL 5 分钟内复用结果,节省 API 调用
_CACHE_TTL_SECONDS = 300
_analysis_cache: dict = {} # url -> (result_dict, timestamp)
PRICE_MAP = {
"simple": (10, 15, "画面简单干净"),
"normal": (15, 20, "一般复杂度"),
"complex": (20, 25, "细节偏多"),
"hard": (25, 30, "非常复杂"),
}
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.base_url = os.getenv("OPENAI_BASE_URL", "https://open.bigmodel.cn/api/paas/v4")
# 视觉模型,智谱 GLM-4V 系列
self.vision_model = os.getenv("VISION_MODEL", "glm-4v-flash")
def _is_url(self, image_path: str) -> bool:
return image_path.startswith("http://") or image_path.startswith("https://")
def _load_image_base64(self, image_path: str) -> Optional[str]:
"""本地图片转 base64"""
try:
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
except Exception as e:
print(f"[ImageAnalyzer] 读取图片失败: {e}")
return None
async def _get_image_size(self, image_path: str) -> Tuple[int, int]:
"""获取图片像素尺寸 (width, height)URL 或 本地路径"""
try:
if self._is_url(image_path):
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(image_path) as resp:
if resp.status != 200:
return (0, 0)
data = await resp.read()
from io import BytesIO
with Image.open(BytesIO(data)) as img:
w, h = img.size
return (int(w), int(h))
else:
with Image.open(image_path) as img:
w, h = img.size
return (int(w), int(h))
except Exception as e:
print(f"[ImageAnalyzer] 获取尺寸失败: {e}")
return (0, 0)
# 最短等待时间即使AI极快返回也等这么久看起来像真人在找
MIN_WAIT_SECONDS = 4
async def analyze(self, image_path: str) -> dict:
"""
异步分析图片复杂度(使用火山引擎 /responses 接口)。
实际等待时间 = max(视觉AI响应时间, MIN_WAIT_SECONDS)
Args:
image_path: 图片URL 或 本地路径
Returns:
{
"complexity": "simple|normal|complex|hard",
"reason": "原因描述",
"price_min": 最低报价,
"price_max": 最高报价,
"price_suggest": 建议报价,
"elapsed": 实际耗时秒数,
"success": True/False
}
"""
if not self.api_key:
await asyncio.sleep(self.MIN_WAIT_SECONDS)
return self._fallback("未配置 API Key")
# 缓存:仅对 URL 生效,本地路径不缓存
cache_key = image_path if self._is_url(image_path) else None
if cache_key:
now = time.monotonic()
cached = self._analysis_cache.get(cache_key)
if cached:
result, cached_at = cached
if now - cached_at < self._CACHE_TTL_SECONDS:
print(f"[ImageAnalyzer] 缓存命中 | URL 已分析过,跳过 API 调用")
result = dict(result)
result["elapsed"] = 0
return result
else:
del self._analysis_cache[cache_key]
start = time.monotonic()
try:
# 构建图片内容
if self._is_url(image_path):
image_item = {
"type": "input_image",
"image_url": image_path
}
else:
b64 = self._load_image_base64(image_path)
if not b64:
await asyncio.sleep(self.MIN_WAIT_SECONDS)
return self._fallback("图片读取失败")
image_item = {
"type": "input_image",
"image_url": f"data:image/jpeg;base64,{b64}"
}
# 使用火山引擎官方 SDKAsyncOpenAI + /responses 接口)
client = AsyncOpenAI(
base_url=self.base_url,
api_key=self.api_key,
)
response = await client.responses.create(
model=self.vision_model,
input=[
{
"role": "user",
"content": [
image_item,
{
"type": "input_text",
"text": ANALYSIS_PROMPT
}
]
}
]
)
content = response.output_text
elapsed = time.monotonic() - start
print(f"[ImageAnalyzer] 视觉AI响应耗时: {elapsed:.1f}s")
await self._wait_remaining(elapsed)
result = self._parse_result(content)
result["elapsed"] = elapsed
# 计算尺寸与类型加价
try:
w, h = await self._get_image_size(image_path)
mp = round((w * h) / 1_000_000, 2) if w and h else 0.0
result["width"] = w
result["height"] = h
result["megapixels"] = mp
# 归一化类型
subj = (result.get("subject") or "").lower()
ptype = (result.get("proc_type") or "").lower()
ratio = result.get("aspect_ratio") or "1:1"
category = "general"
# 初步判断
if ("壁纸" in subj) or ("wallpaper" in subj) or ratio in ("9:16", "16:9"):
category = "wallpaper"
elif ("" in subj) or ("" in subj) or ("印花" in subj) or ("fabric" in subj) or ("cloth" in subj) or ("服装" in subj) or ("印花" in ptype):
category = "clothing"
elif ("logo" in subj) or ("logo" in ptype):
category = "logo"
elif ("海报" in subj) or ("poster" in subj):
category = "poster"
elif ("人像" in subj) or ("人物" in subj) or ("portrait" in subj):
category = "portrait"
elif ("产品" in subj) or ("product" in subj):
category = "product"
elif ("老照片" in subj) or ("old photo" in subj):
category = "old_photo"
# 可印花/印刷物体扩展
keywords = subj + " " + ptype
if any(k in keywords for k in ["装饰画", "挂画", "油画", "canvas", "painting"]):
category = "decor_painting"
elif any(k in keywords for k in ["窗帘", "curtain"]):
category = "curtain"
elif any(k in keywords for k in ["地垫", "脚垫", "地毯", "", "mat", "rug"]):
category = "floor_mat"
elif any(k in keywords for k in ["广告牌", "喷绘", "展架", "灯箱", "banner", "billboard"]):
category = "billboard"
elif any(k in keywords for k in ["毯子", "毛毯", "blanket"]):
category = "blanket"
elif any(k in keywords for k in ["桌布", "台布", "tablecloth", "桌旗"]):
category = "tablecloth"
elif any(k in keywords for k in ["书本", "书籍", "封面", "book", "book cover"]):
category = "book"
elif any(k in keywords for k in ["鼠标垫", "mouse pad", "mousepad"]):
category = "mouse_pad"
elif any(k in keywords for k in ["头像", "个人头像", "个人照", "profile", "avatar"]):
category = "avatar"
result["category"] = category
surcharge = 0
size_note = ""
# 按类别设定尺寸要求与加价阈值(单位:百万像素)
if category == "wallpaper":
if h and h < 1920:
size_note = "壁纸高度低于1920px清晰度可能不足"
if mp > 8:
surcharge = 10
elif mp > 3:
surcharge = 5
elif category == "clothing":
if (w and w < 1024) or (h and h < 1024):
size_note = "印花源图边长低于1024px放大后细节可能不足"
if mp > 6:
surcharge = 10
elif mp > 2:
surcharge = 5
elif category in ("poster", "portrait", "product"):
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "logo":
if mp > 6:
surcharge = 5
elif category == "decor_painting":
if (w and w < 1500) or (h and h < 1500):
size_note = "装饰画边长低于1500px打印放大可能不够清晰"
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "curtain":
if (w and w < 1500):
size_note = "窗帘宽度低于1500px印花放大可能不够清晰"
if mp > 16:
surcharge = 10
elif mp > 8:
surcharge = 5
elif category == "floor_mat":
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "billboard":
if (w and w < 2000) or (h and h < 1000):
size_note = "广告牌尺寸较小,建议更高分辨率以保证喷绘清晰"
if mp > 20:
surcharge = 10
elif mp > 10:
surcharge = 5
elif category == "blanket":
if mp > 16:
surcharge = 10
elif mp > 8:
surcharge = 5
elif category == "tablecloth":
if mp > 12:
surcharge = 10
elif mp > 6:
surcharge = 5
elif category == "book":
if (w and w < 800):
size_note = "书本封面宽度低于800px印刷细节可能不足"
if mp > 6:
surcharge = 5
elif category == "mouse_pad":
if (w and w < 1000):
size_note = "鼠标垫源图宽度低于1000px细节可能不足"
if mp > 4:
surcharge = 5
elif category == "avatar":
if (w and w < 800) or (h and h < 800):
size_note = "头像边长低于800px清晰度可能不足"
if mp > 6:
surcharge = 5
else:
if mp > 8:
surcharge = 10
elif mp > 4:
surcharge = 5
# 应用加价保持5的整数倍与 10-30 区间
base = result.get("price_suggest", 20)
adjusted = base + surcharge
adjusted = max(10, min(30, adjusted))
adjusted = round(adjusted / 5) * 5
# 同步范围
result["price_suggest"] = adjusted
result["price_max"] = max(result["price_max"], adjusted)
result["size_surcharge"] = surcharge
result["size_note"] = size_note
except Exception as e:
print(f"[ImageAnalyzer] 尺寸与类型加价计算失败: {e}")
# 写入缓存
if cache_key:
self._analysis_cache[cache_key] = (dict(result), time.monotonic())
# 简单清理:缓存超过 50 条时删最旧的
if len(self._analysis_cache) > 50:
oldest = min(self._analysis_cache.items(), key=lambda x: x[1][1])
del self._analysis_cache[oldest[0]]
return result
except asyncio.TimeoutError:
elapsed = time.monotonic() - start
print(f"[ImageAnalyzer] 请求超时 ({elapsed:.1f}s)")
return self._fallback("请求超时")
except Exception as e:
elapsed = time.monotonic() - start
print(f"[ImageAnalyzer] 分析失败: {e}")
await self._wait_remaining(elapsed)
return self._fallback(str(e))
async def _wait_remaining(self, elapsed: float):
"""补足最短等待时间"""
remaining = self.MIN_WAIT_SECONDS - elapsed
if remaining > 0:
await asyncio.sleep(remaining)
def _parse_line(self, content: str, *keys: str) -> str:
"""从多行文本中提取指定字段值,支持中英文冒号"""
for line in content.strip().split("\n"):
line = line.strip()
for key in keys:
if line.startswith(key):
return line.split(":", 1)[-1].split("", 1)[-1].strip()
return ""
def _parse_result(self, content: str) -> dict:
"""解析模型返回的结果"""
p = self._parse_line
# 复杂度
complexity_raw = p(content, "复杂度:", "复杂度:").lower()
complexity = complexity_raw if complexity_raw in self.PRICE_MAP else "normal"
sensitive = p(content, "敏感内容:", "敏感内容:").lower().strip()
flatness = p(content, "平整度:", "平整度:").lower().strip() # flat|mild|rough
has_text = p(content, "含文字:", "含文字:").lower().strip()
has_face = p(content, "含人脸:", "含人脸:").lower().strip()
has_shadow = p(content, "阴影:", "阴影:").lower().strip()
reason = p(content, "原因:", "原因:")
subject = p(content, "主体:", "主体:")
proc_type = p(content, "类型:", "类型:")
quality = p(content, "质量:", "质量:")
feasibility = p(content, "可做:", "可做:").lower()
risk = p(content, "风险:", "风险:").lower().strip()
perspective = p(content, "透视:", "透视:").lower().strip()
aspect_ratio = p(content, "比例:", "比例:").strip()
gemini_prompt= p(content, "提示词:", "提示词:")
note = p(content, "备注:", "备注:")
if has_face not in ("yes", "no"):
has_face = "no"
if risk not in ("none", "low", "high"):
risk = "none"
if perspective not in ("no", "mild", "strong"):
perspective = "no"
# 校验比例合法性
valid_ratios = {"1:1", "9:16", "16:9", "3:4", "4:3", "3:2", "2:3", "5:4", "4:5"}
if aspect_ratio not in valid_ratios:
aspect_ratio = "1:1" # 默认正方形
price_min, price_max, default_reason = self.PRICE_MAP[complexity]
if not reason:
reason = default_reason
if feasibility not in ("yes", "partial", "no"):
feasibility = "yes"
# 建议报价complex/hard 取固定值simple/normal 取中间且必须为5的整数倍
raw = price_max if complexity in ("complex", "hard") else (price_min + price_max) // 2
price_suggest = round(raw / 5) * 5
if sensitive == "yes":
feasibility = "no"
note = "图片含敏感内容,不接单"
risk_label = {"none": "无风险", "low": "低风险", "high": "高风险"}.get(risk, "")
sens_tag = " | 敏感:是" if sensitive == "yes" else ""
print(f"[ImageAnalyzer] 识别结果: {complexity} | {reason} | 建议报价: {price_suggest}{sens_tag}")
print(f"[ImageAnalyzer] 主体: {subject} | 类型: {proc_type} | 质量: {quality} | 平整度: {flatness} | 含文字: {has_text} | 含人脸: {has_face} | 阴影: {has_shadow} | 风险: {risk_label} | 透视: {perspective} | 比例: {aspect_ratio} | 可做: {feasibility}")
if gemini_prompt:
print(f"[ImageAnalyzer] Gemini提示词: {gemini_prompt}")
if note and note not in ("", ""):
print(f"[ImageAnalyzer] 备注: {note}")
return {
"complexity": complexity,
"reason": reason,
"subject": subject,
"proc_type": proc_type,
"quality": quality,
"flatness": flatness if flatness in ("flat", "mild", "rough") else "",
"has_text": has_text if has_text in ("yes", "no") else "no",
"has_face": has_face, # yes / no
"has_shadow": has_shadow if has_shadow in ("yes", "no") else "no",
"risk": risk, # none / low / high
"feasibility": feasibility,
"perspective": perspective,
"aspect_ratio": aspect_ratio,
"gemini_prompt": gemini_prompt,
"note": note,
"price_min": price_min,
"price_max": price_max,
"price_suggest": price_suggest,
"success": True
}
def _fallback(self, reason: str) -> dict:
"""识别失败时的默认结果(返回 normal让人工判断"""
print(f"[ImageAnalyzer] 识别失败,使用默认值: {reason}")
return {
"complexity": "normal",
"reason": reason,
"subject": "",
"proc_type": "",
"quality": "",
"flatness": "",
"has_text": "no",
"has_face": "no",
"has_shadow": "no",
"risk": "none",
"feasibility": "yes",
"perspective": "no",
"aspect_ratio": "1:1",
"gemini_prompt": "",
"note": "",
"price_min": 20,
"price_max": 30,
"price_suggest": 25,
"success": False
}
# 全局实例
image_analyzer = ImageAnalyzer()