feat: 65个PS操作工具 + ai_chat拆分 + 开发准则更新 - ps-operations.ts 1361行(图层/变换/选区/蒙版/调色/文字/文档管理) - ai_llm.py 模型调用层独立 - ai_tools.py 65个function calling schema - aiToolExecutor.ts 全量映射 - 开发准则新增AI模块规范

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-07 18:52:34 +08:00
parent dae906aba7
commit 8688422578
22 changed files with 1014 additions and 574 deletions

View File

@@ -15,6 +15,12 @@ from app.db import get_db
from app.models.user import User
from app.models.chat import ChatSession, ChatMessage
from app.api.v1.ai_tools import PS_TOOLS, TOOL_DISPLAY_NAMES
from app.api.v1.ai_llm import (
SYSTEM_PROMPT, VISION_PROMPT,
is_gemini_model, call_llm_with_tools, call_gemini_with_tools,
call_vision_llm, call_gemini, call_image_model,
verify_pattern_result, mock_reply,
)
log = logging.getLogger(__name__)
# 防止 uvicorn --reload 导致 handler 重复
@@ -61,65 +67,6 @@ class VerifyResultRequest(BaseModel):
prompt: Optional[str] = None
vision_model: Optional[str] = None
# ==================== System Prompt ====================
SYSTEM_PROMPT = """你是 DesignerCEP 的 AI 助手,运行在 Adobe Photoshop CEP 插件中。
你的能力:
1. 回答关于 Photoshop 操作和插件使用的问题
2. 通过工具直接操作 Photoshop创建图层、对齐、查看文档信息等
3. 帮用户排查操作中遇到的错误
4. **AI 智能套图**:两阶段流程 — 先生成预览确认,再提取套到裁片上
## AI 智能套图流程(两阶段)
当用户上传成衣图片并要求套图时:
### 阶段 1 — 识别裁片 + 生成预览(需用户确认)
1. 调用 **identify_pieces** — 截取画布并识别每个图层是什么裁片部位(前片、后片、袖子等)
2. 告诉用户识别结果(如 "M-1=前片, M-2=后片, M-5=左袖..."
3. 调用 **generate_garment_preview** — 会自动在裁片下方标注名称标签(如 "M-1 前片"),然后截取带标签的画布 + 成衣照片一起发给 AI 生成预览
4. 预览图显示在聊天中,每个裁片都有标签,**等用户确认 OK 后**进入阶段 2
### 阶段 2 — 提取花样 + 正式套图(用户确认后)
5. 根据用户反馈决定每个裁片的处理方式:
- 需要花样的裁片 → extract_and_apply_all_pieces 中不设 color 字段
- 纯色的裁片(如后幅) → 设置 color 字段为对应颜色值(如 "#F5E6D0"),直接 PS 填充不调 AI
6. 调用 **extract_and_apply_all_pieces** 执行套图
7. 可选:调用 **verify_pattern_result** 验证效果
重要:
- 阶段 1 完成后必须**等用户说"可以"/"OK"**才执行阶段 2
- 用户可能会说"袖子纯色""后幅不要花样"等,要根据 identify_pieces 的结果对应到正确的图层名
- 纯色裁片用 color 字段直接填充,不要浪费 AI 提取调用
重要规则:
- 当用户要求执行 PS 操作时,使用工具完成
- 执行操作前可以先了解当前文档和图层状态
- 用简洁的中文回答,适合在小面板中阅读
- 如果工具执行失败,向用户解释原因并建议解决方案
- 套图时应一次性完成「生成 → 套图 → 验证」全流程,不要中途停下等待用户指令
"""
VISION_PROMPT = """你是一位资深的服装设计分析师,同时也是 DesignerCEP 的 AI 助手。
当用户发送服装/成衣图片时,请从以下维度进行专业分析:
1. **服装类别** — 上衣/裤子/裙子/连衣裙/外套/配饰等,细分款式
2. **面料分析** — 根据视觉特征推测面料类型(棉、涤纶、丝绸、针织、牛仔、雪纺等),分析面料质感
3. **颜色与印花** — 主色调、配色方案、印花/图案类型及工艺(数码印花、丝网印刷、提花等)
4. **版型特点** — 修身/宽松/A字/H型等分析领口、袖型、肩线、腰线、下摆处理
5. **工艺细节** — 缝线工艺、拉链/纽扣/暗扣、口袋设计、装饰细节、包边/锁边
6. **设计评价** — 设计亮点、风格定位(休闲/正装/运动/时尚等)、目标消费群体
7. **改进建议** — 如有可改进之处,给出专业建议
规则:
- 如果图片不是服装相关,也请尽力分析图片内容并给出有价值的反馈
- 如果用户同时提了文字问题,请结合图片和问题一起回答
- 用清晰、结构化的中文回答,适合在设计工作中参考
- 回答要专业但不啰嗦,突出重点信息
"""
# ==================== 对话管理接口 ====================
@router.get("/ai/models")
@@ -274,8 +221,8 @@ async def chat(
# 4. 调用 LLM统一走工具模型图片信息通过文本标记传递
try:
if not settings.AI_API_KEY and not (data.model and _is_gemini_model(data.model)):
reply_content = _mock_reply(data.message, has_image)
if not settings.AI_API_KEY and not (data.model and is_gemini_model(data.model)):
reply_content = mock_reply(data.message, has_image)
tool_calls_data = None
else:
call_history = history_list
@@ -287,11 +234,11 @@ async def chat(
}]
# 根据模型类型路由
if data.model and _is_gemini_model(data.model):
if data.model and is_gemini_model(data.model):
log.info(f"[Chat] 路由到 Gemini: {data.model}")
reply_content, tool_calls_data = _call_gemini_with_tools(call_history, data.model)
reply_content, tool_calls_data = call_gemini_with_tools(call_history, data.model)
else:
reply_content, tool_calls_data = _call_llm_with_tools(call_history, model_override=data.model)
reply_content, tool_calls_data = call_llm_with_tools(call_history, model_override=data.model)
except Exception as e:
reply_content = f"AI 请求出错: {str(e)}"
tool_calls_data = None
@@ -348,15 +295,15 @@ async def submit_tool_result(
# 3. 再次调用 LLM这次不带 tools让 AI 总结结果)
try:
if not settings.AI_API_KEY and not (data.model and _is_gemini_model(data.model)):
if not settings.AI_API_KEY and not (data.model and is_gemini_model(data.model)):
reply_content = f"工具 {data.tool_name} 执行完成。"
tool_calls_data = None
else:
if data.model and _is_gemini_model(data.model):
if data.model and is_gemini_model(data.model):
log.info(f"[ToolResult] 路由到 Gemini: {data.model}")
reply_content, tool_calls_data = _call_gemini_with_tools(history_list, data.model)
reply_content, tool_calls_data = call_gemini_with_tools(history_list, data.model)
else:
reply_content, tool_calls_data = _call_llm_with_tools(history_list, model_override=data.model)
reply_content, tool_calls_data = call_llm_with_tools(history_list, model_override=data.model)
except Exception as e:
reply_content = f"AI 总结出错: {str(e)}"
tool_calls_data = None
@@ -379,235 +326,6 @@ async def submit_tool_result(
}
# ==================== LLM 调用 ====================
def _call_llm_with_tools(messages_history: List[dict], model_override: str = None):
"""调用 LLM支持 function calling"""
from openai import OpenAI
use_model = model_override or settings.AI_MODEL
client = OpenAI(
api_key=settings.AI_API_KEY,
base_url=settings.AI_BASE_URL or "https://api.openai.com/v1",
)
messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history
# ---- 日志:请求详情 ----
log.info(f"{'='*60}")
log.info(f"[LLM] 调用工具模型: {use_model}{' (override)' if model_override else ''}")
log.info(f"[LLM] 消息数量: {len(messages)} (system + {len(messages_history)} history)")
for i, m in enumerate(messages_history[-5:]): # 只打最近 5 条
role = m['role']
content = m['content'][:120] if m.get('content') else '(empty)'
log.info(f"[LLM] history[-{len(messages_history)-i}] {role}: {content}")
log.info(f"[LLM] 工具数量: {len(PS_TOOLS)}")
completion = client.chat.completions.create(
model=use_model,
messages=messages,
tools=PS_TOOLS,
tool_choice="auto",
)
choice = completion.choices[0]
message = choice.message
# ---- 日志:响应详情 ----
log.info(f"[LLM] 响应 finish_reason={choice.finish_reason}")
if message.content:
log.info(f"[LLM] 回复文本: {message.content[:150]}...")
if message.tool_calls:
for tc in message.tool_calls:
log.info(f"[LLM] 工具调用: {tc.function.name}({tc.function.arguments[:200]})")
else:
log.info(f"[LLM] 无工具调用")
log.info(f"{'='*60}")
# 检查是否有工具调用
if message.tool_calls and len(message.tool_calls) > 0:
tool_calls_data = []
for tc in message.tool_calls:
args = {}
if tc.function.arguments:
try:
args = json.loads(tc.function.arguments)
except json.JSONDecodeError:
args = {}
tool_calls_data.append({
"id": tc.id,
"name": tc.function.name,
"display_name": TOOL_DISPLAY_NAMES.get(tc.function.name, tc.function.name),
"args": args,
"status": "pending"
})
return message.content or "", tool_calls_data
# 普通文本回复
return message.content or "", None
# ==================== Gemini 调用 ====================
def _is_gemini_model(model_name: str) -> bool:
"""判断是否是 Gemini 模型"""
return model_name and "gemini" in model_name.lower()
def _call_gemini(messages_history: List[dict], model: str, images_b64: List[str] = None) -> str:
"""
调用 Gemini API通过第三方代理OpenAI 兼容格式)
支持纯文本对话和图片输入(视觉分析)
返回文本内容
"""
from openai import OpenAI as _OpenAI
if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL:
raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置")
client = _OpenAI(
api_key=settings.GEMINI_API_KEY,
base_url=f"{settings.GEMINI_BASE_URL}/v1",
)
# 构造 OpenAI 格式消息
messages = []
for msg in messages_history:
role = msg.get("role", "user")
content = msg.get("content", "")
# OpenAI 格式支持 system / user / assistant
if role not in ("system", "user", "assistant"):
role = "user"
messages.append({"role": role, "content": content})
# 如果有图片,把最后一条 user 消息改成多模态格式
if images_b64:
# 找到最后一条 user 消息
last_user_idx = None
for i in range(len(messages) - 1, -1, -1):
if messages[i]["role"] == "user":
last_user_idx = i
break
if last_user_idx is not None:
text_content = messages[last_user_idx]["content"]
multimodal_content = []
# 先放图片
for img_b64 in images_b64:
multimodal_content.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
})
# 再放文本
multimodal_content.append({"type": "text", "text": text_content})
messages[last_user_idx]["content"] = multimodal_content
log.info(f"{'='*60}")
log.info(f"[Gemini] 调用模型: {model} (OpenAI 兼容)")
log.info(f"[Gemini] 消息数: {len(messages)}, 图片数: {len(images_b64) if images_b64 else 0}")
completion = client.chat.completions.create(
model=model,
messages=messages,
)
result = completion.choices[0].message.content or ""
log.info(f"[Gemini] 回复: {result[:200]}...")
return result
def _call_gemini_with_tools(messages_history: List[dict], model: str) -> tuple:
"""
用 Gemini 做对话(不支持 function calling靠 prompt 引导调用工具)
返回 (content, tool_calls_data) — tool_calls_data 始终为 None
注意Gemini 不原生支持 function calling但文本对话正常
"""
# 加入 system prompt
full_history = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history
log.info(f"{'='*60}")
log.info(f"[Gemini] 调用对话模型: {model}")
for i, m in enumerate(messages_history[-3:]):
log.info(f"[Gemini] history[-{len(messages_history)-i}] {m['role']}: {str(m.get('content',''))[:120]}")
result = _call_gemini(full_history, model)
log.info(f"[Gemini] 回复文本: {result[:150]}...")
log.info(f"[Gemini] 无工具调用Gemini 不支持 function calling")
return result, None
def _call_vision_llm(user_message: str, image_base64: str, history: List[dict], model_override: str = None) -> str:
"""调用视觉模型分析图片(自动路由 Qwen / Gemini"""
use_model = model_override or settings.AI_VISION_MODEL
# ---------- Gemini 路由 ----------
if _is_gemini_model(use_model):
log.info(f"[Vision] 使用 Gemini 视觉模型: {use_model}")
msgs = [{"role": "system", "content": VISION_PROMPT}]
for h in history[-10:]:
role = h["role"] if h["role"] != "tool" else "user"
msgs.append({"role": role, "content": h["content"]})
msgs.append({"role": "user", "content": user_message})
return _call_gemini(msgs, use_model, images_b64=[image_base64])
# ---------- Qwen / OpenAI 路由 ----------
from openai import OpenAI
client = OpenAI(
api_key=settings.AI_API_KEY,
base_url=settings.AI_BASE_URL or "https://api.openai.com/v1",
)
# 构造多模态消息
user_content = [
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}
},
{
"type": "text",
"text": user_message
}
]
# 组装消息列表(历史 + 当前图片消息)
messages = [
{"role": "system", "content": VISION_PROMPT},
]
# 添加近期历史(纯文本)
for h in history[-10:]:
role = h["role"] if h["role"] != "tool" else "user"
messages.append({"role": role, "content": h["content"]})
# 当前带图片的用户消息
messages.append({"role": "user", "content": user_content})
completion = client.chat.completions.create(
model=use_model,
messages=messages,
)
return completion.choices[0].message.content or ""
def _mock_reply(message: str, has_image: bool = False) -> str:
"""未配置 API Key 时的模拟回复"""
if has_image:
return "【模拟分析】收到图片。AI 分析功能需要配置 AI_API_KEY 和 AI_VISION_MODEL。\n\n配置后可以分析:服装类别、面料、颜色印花、版型、工艺细节等。"
if "套图" in message:
return "套图功能在「参数预设」页面。先选择花样组和裁片组,再添加规则,最后点击生成。"
elif "对齐" in message:
return "图层对齐功能在「参数预设」页面顶部。支持上下左右居中对齐、领口对齐等。"
elif "裁片" in message or "PLT" in message:
return "PLT 裁片处理在「PLT 裁片处理」页面。上传 PLT 文件,选择尺码,点击开始处理。"
else:
return "你好!我是 DesignerCEP AI 助手。你可以问我关于套图、裁片、对齐等功能的问题。"
# ==================== 图案生成 & 验证 ====================
@@ -638,7 +356,7 @@ async def generate_preview(
"请从这件成衣中提取面料花样,生成一张干净的花样平铺图。"
)
result_url, desc = _call_image_model(
result_url, desc = call_image_model(
images_b64=images,
prompt=data.prompt or default_prompt,
model_override=data.image_edit_model,
@@ -755,7 +473,7 @@ async def refine_piece(
log.info(f"[RefinePiece] 提示词: {prompt[:200]}")
result_url, desc = _call_image_model(
result_url, desc = call_image_model(
images_b64=[data.cropped_base64],
prompt=prompt,
model_override=data.image_edit_model,
@@ -844,7 +562,7 @@ async def extract_piece_pattern(
f"将该区域的花样输出为一张完整的矩形图片。"
f"要求:只保留花样内容,去掉轮廓线,填满整个矩形,保持清晰。"
)
result_url, desc = _call_image_model(
result_url, desc = call_image_model(
images_b64=[data.preview_base64],
prompt=prompt,
model_override=getattr(data, 'image_edit_model', None),
@@ -940,14 +658,14 @@ type 只有四种:
只返回 JSON。"""
# ---------- Gemini 路由 ----------
if _is_gemini_model(use_model):
if is_gemini_model(use_model):
log.info(f"[IdentifyPieces] 使用 Gemini 视觉: {use_model}")
images = []
if garment_b64:
images.append(garment_b64)
images.append(canvas_b64)
content = _call_gemini(
content = call_gemini(
[{"role": "user", "content": prompt}],
use_model,
images_b64=images
@@ -1006,265 +724,10 @@ async def verify_result(
raise HTTPException(400, "AI_API_KEY 未配置")
try:
feedback = _verify_pattern_result(data.garment_base64, data.canvas_base64, data.prompt, vision_model=data.vision_model)
feedback = verify_pattern_result(data.garment_base64, data.canvas_base64, data.prompt, vision_model=data.vision_model)
return {"code": 200, "data": {"feedback": feedback}}
except Exception as e:
log.error(f"验证失败: {e}", exc_info=True)
raise HTTPException(500, f"验证失败: {str(e)}")
# ==================== 公共:图片模型调用 + 响应解析 ====================
def _call_image_model(images_b64: List[str], prompt: str, model_override: str = None) -> tuple:
"""
调用图片编辑/生成模型(自动路由 DashScope / Gemini
返回 (result_url_or_base64, description)
"""
import requests as http_requests
use_model = model_override or settings.AI_IMAGE_EDIT_MODEL
# ---------- Gemini 图片生成路由OpenAI 兼容格式,走代理) ----------
if _is_gemini_model(use_model):
log.info(f"[ImageModel] 使用 Gemini 图片模型: {use_model}")
if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL:
raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置")
from openai import OpenAI as _OpenAI
# 第三方代理走 OpenAI 兼容接口(/v1/chat/completions
client = _OpenAI(
api_key=settings.GEMINI_API_KEY,
base_url=f"{settings.GEMINI_BASE_URL}/v1",
)
# 构造 OpenAI 格式的多模态消息(和正常调用一样)
content_parts = [{"type": "text", "text": prompt}]
for img_b64 in images_b64:
content_parts.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
})
log.info(f"[ImageModel] Gemini OpenAI 兼容模式")
log.info(f"[ImageModel] 模型: {use_model}")
log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图大小: {[len(b)//1024 for b in images_b64]}KB")
log.info(f"[ImageModel] 提示词: {prompt[:200]}")
completion = client.chat.completions.create(
model=use_model,
messages=[{"role": "user", "content": content_parts}],
)
result_content = completion.choices[0].message.content or ""
log.info(f"[ImageModel] Gemini 回复长度: {len(result_content)} chars")
# 从 markdown 中提取 base64 图片:![...](data:image/...;base64,...)
import re
match = re.search(r'!\[.*?\]\((data:image/(\w+);base64,([^)]+))\)', result_content)
if not match:
# 也尝试直接匹配 data URI有些响应不带 markdown 格式)
match2 = re.search(r'(data:image/(\w+);base64,([A-Za-z0-9+/=]+))', result_content)
if match2:
match = match2
if not match:
log.warning(f"[ImageModel] Gemini 响应中无图片前500字: {result_content[:500]}")
raise ValueError("Gemini 未返回图片,请检查模型是否支持图片生成")
data_uri = match.group(1)
img_format = match.group(2)
image_b64 = match.group(3)
# base64 填充修正
padding = 4 - len(image_b64) % 4
if padding != 4:
image_b64 += '=' * padding
log.info(f"[ImageModel] Gemini 返回图片: image/{img_format}, {len(image_b64)//1024}KB")
# 保存调试图片
import os, time as _time
debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images')
os.makedirs(debug_dir, exist_ok=True)
ts = int(_time.time())
debug_path = os.path.join(debug_dir, f'{ts}_gemini_output.{img_format}')
try:
with open(debug_path, 'wb') as f:
f.write(base64.b64decode(image_b64))
log.info(f"[ImageModel] Gemini 输出图片已保存: {debug_path}")
except Exception as e:
log.warning(f"[ImageModel] 保存调试图片失败: {e}")
# 提取文本描述(去掉图片部分)
description = re.sub(r'!\[.*?\]\(data:image/[^)]+\)', '', result_content).strip()
# 返回 data URI前端可直接用于 <img src>
image_data_uri = f"data:image/{img_format};base64,{image_b64}"
log.info(f"[ImageModel] Gemini 生成完成")
return image_data_uri, description
# ---------- DashScope 原生接口 ----------
# DashScope 图片编辑模型用原生接口,不用 /compatible-mode
api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"
# 构造 DashScope 原生格式的消息
content_parts = []
for img_b64 in images_b64:
content_parts.append({"image": f"data:image/jpeg;base64,{img_b64}"})
content_parts.append({"text": prompt})
payload = {
"model": use_model,
"input": {
"messages": [{
"role": "user",
"content": content_parts
}]
},
"parameters": {
"n": 1,
"watermark": False,
"prompt_extend": True,
}
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {settings.AI_API_KEY}",
}
log.info(f"{'='*60}")
log.info(f"[ImageModel] 调用 DashScope 原生 API")
log.info(f"[ImageModel] 模型: {use_model}")
log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图大小: {[len(b)//1024 for b in images_b64]}KB")
log.info(f"[ImageModel] 提示词: {prompt[:200]}")
log.info(f"[ImageModel] 端点: {api_url}")
# 调试:把发给模型的图片保存到磁盘
import os
debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images')
os.makedirs(debug_dir, exist_ok=True)
import time
ts = int(time.time())
for idx, img_b64 in enumerate(images_b64):
debug_path = os.path.join(debug_dir, f'{ts}_input_{idx}.jpg')
try:
with open(debug_path, 'wb') as f:
f.write(base64.b64decode(img_b64))
log.info(f"[ImageModel] 调试图片已保存: {debug_path}")
except Exception as e:
log.warning(f"[ImageModel] 保存调试图片失败: {e}")
resp = http_requests.post(api_url, json=payload, headers=headers, timeout=120)
if resp.status_code != 200:
error_data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
err_msg = error_data.get("message", resp.text[:300])
err_code = error_data.get("code", "")
log.error(f"[ImageModel] API 错误: {resp.status_code} {err_code} {err_msg}")
if "data_inspection_failed" in str(error_data):
raise ValueError("图片内容未通过安全审核,请更换图片")
raise ValueError(f"图片模型调用失败({resp.status_code}): {err_msg}")
data = resp.json()
log.info(f"[ImageModel] 响应 keys: {list(data.keys())}")
# 解析响应output.choices[0].message.content[].image
output = data.get("output", {})
choices = output.get("choices", [])
if not choices:
log.warning(f"[ImageModel] 无 choices: {str(data)[:500]}")
raise ValueError("模型未返回结果")
content_list = choices[0].get("message", {}).get("content", [])
result_b64 = None
description = ""
image_url = None
for item in content_list:
if isinstance(item, dict):
if "image" in item:
image_url = item["image"]
log.info(f"[ImageModel] 获取到图片 URL: {image_url[:100]}...")
elif "text" in item:
description += item["text"]
if not image_url:
log.warning(f"[ImageModel] content 中无图片: {str(content_list)[:500]}")
raise ValueError("模型未返回图片")
# 调试:保存输出图片 URL
log.info(f"[ImageModel] 输出图片 URL: {image_url[:120]}...")
try:
import os, time
debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images')
ts = int(time.time())
out_resp = http_requests.get(image_url, timeout=60)
if out_resp.status_code == 200:
debug_path = os.path.join(debug_dir, f'{ts}_output.png')
with open(debug_path, 'wb') as f:
f.write(out_resp.content)
log.info(f"[ImageModel] 输出图片已保存: {debug_path}")
except Exception as e:
log.warning(f"[ImageModel] 保存输出图片失败: {e}")
return image_url, description
def _verify_pattern_result(garment_b64: str, canvas_b64: str, extra_prompt: str = None, vision_model: str = None) -> str:
"""用视觉模型对比原始成衣和套图结果(自动路由 Qwen/Gemini"""
use_model = vision_model or settings.AI_VISION_MODEL
log.info(f"{'='*60}")
log.info(f"[Verify] 调用视觉模型验证套图效果")
log.info(f"[Verify] 模型: {use_model}")
log.info(f"[Verify] 成衣图: {len(garment_b64)//1024}KB, 画布图: {len(canvas_b64)//1024}KB")
if extra_prompt:
log.info(f"[Verify] 用户补充: {extra_prompt[:100]}")
verify_prompt = (
"请对比这两张图片:第一张是原始成衣照片,第二张是套图结果。\n"
"验证1. 花样还原度(颜色/比例/方向2. 裁片覆盖完整度 "
"3. 对齐质量 4. 整体效果。给出评分(1-10)和具体改进建议。"
)
if extra_prompt:
verify_prompt += f"\n用户补充:{extra_prompt}"
# ---------- Gemini 路由 ----------
if _is_gemini_model(use_model):
log.info(f"[Verify] 使用 Gemini 视觉: {use_model}")
return _call_gemini(
[
{"role": "user", "content": "你是服装套图质量检验专家。"},
{"role": "user", "content": verify_prompt},
],
use_model,
images_b64=[garment_b64, canvas_b64]
)
# ---------- Qwen / OpenAI 路由 ----------
from openai import OpenAI
client = OpenAI(
api_key=settings.AI_API_KEY,
base_url=settings.AI_BASE_URL or "https://api.openai.com/v1",
)
completion = client.chat.completions.create(
model=use_model,
messages=[
{"role": "system", "content": "你是服装套图质量检验专家。"},
{"role": "user", "content": [
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{garment_b64}"}},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{canvas_b64}"}},
{"type": "text", "text": verify_prompt},
]},
],
)
return completion.choices[0].message.content or ""