feat: 65个PS操作工具 + ai_chat拆分 + 开发准则更新 - ps-operations.ts 1361行(图层/变换/选区/蒙版/调色/文字/文档管理) - ai_llm.py 模型调用层独立 - ai_tools.py 65个function calling schema - aiToolExecutor.ts 全量映射 - 开发准则新增AI模块规范
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
414
Server/app/api/v1/ai_llm.py
Normal file
414
Server/app/api/v1/ai_llm.py
Normal file
@@ -0,0 +1,414 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI 模型调用层
|
||||
统一管理所有 LLM / Vision / Image 模型的调用逻辑
|
||||
支持 Qwen (DashScope) 和 Gemini (第三方代理) 两套路由
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
import json, base64, re, logging
|
||||
from app.core.config import settings
|
||||
from app.api.v1.ai_tools import PS_TOOLS, TOOL_DISPLAY_NAMES
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# ==================== Prompts ====================
|
||||
|
||||
SYSTEM_PROMPT = """你是 DesignerCEP 的 AI 助手,运行在 Adobe Photoshop CEP 插件中。
|
||||
|
||||
你的能力:
|
||||
1. 回答关于 Photoshop 操作和插件使用的问题
|
||||
2. 通过工具直接操作 Photoshop(创建图层、对齐、查看文档信息等)
|
||||
3. 帮用户排查操作中遇到的错误
|
||||
4. **AI 智能套图**:两阶段流程 — 先生成预览确认,再提取套到裁片上
|
||||
|
||||
## AI 智能套图流程(两阶段)
|
||||
|
||||
当用户上传成衣图片并要求套图时:
|
||||
|
||||
### 阶段 1 — 识别裁片 + 生成预览(需用户确认)
|
||||
1. 调用 **identify_pieces** — 截取画布并识别每个图层是什么裁片部位(前片、后片、袖子等)
|
||||
2. 告诉用户识别结果(如 "M-1=前片, M-2=后片, M-5=左袖...")
|
||||
3. 调用 **generate_garment_preview** — 会自动在裁片下方标注名称标签(如 "M-1 前片"),然后截取带标签的画布 + 成衣照片一起发给 AI 生成预览
|
||||
4. 预览图显示在聊天中,每个裁片都有标签,**等用户确认 OK 后**进入阶段 2
|
||||
|
||||
### 阶段 2 — 提取花样 + 正式套图(用户确认后)
|
||||
5. 根据 identify_pieces 分析结果中每个裁片的 type 决定处理方式:
|
||||
- solid → 设 color 字段,PS 直接纯色填充
|
||||
- fill_pattern → AI 提取花型铺满
|
||||
- theme_pattern → 底层 PS 纯色填充(设 color)+ 上层 AI 提取主题图案(白底+正片叠底)
|
||||
- mixed_pattern → 底层 AI 提取花型 + 上层 AI 提取主题图案(白底+正片叠底)
|
||||
6. 调用 **extract_and_apply_all_pieces** 执行套图
|
||||
7. 可选:调用 **verify_pattern_result** 验证效果
|
||||
|
||||
重要:
|
||||
- 阶段 1 完成后必须**等用户说"可以"/"OK"**才执行阶段 2
|
||||
- 用户可能会说"袖子纯色"、"后幅不要花样"等,要根据 identify_pieces 的结果对应到正确的图层名
|
||||
|
||||
重要规则:
|
||||
- 当用户要求执行 PS 操作时,使用工具完成
|
||||
- 执行操作前可以先了解当前文档和图层状态
|
||||
- 用简洁的中文回答,适合在小面板中阅读
|
||||
- 如果工具执行失败,向用户解释原因并建议解决方案
|
||||
"""
|
||||
|
||||
VISION_PROMPT = """你是一位资深的服装设计分析师,同时也是 DesignerCEP 的 AI 助手。
|
||||
|
||||
当用户发送服装/成衣图片时,请从以下维度进行专业分析:
|
||||
|
||||
1. **服装类别** — 上衣/裤子/裙子/连衣裙/外套/配饰等,细分款式
|
||||
2. **面料分析** — 根据视觉特征推测面料类型(棉、涤纶、丝绸、针织、牛仔、雪纺等),分析面料质感
|
||||
3. **颜色与印花** — 主色调、配色方案、印花/图案类型及工艺(数码印花、丝网印刷、提花等)
|
||||
4. **版型特点** — 修身/宽松/A字/H型等,分析领口、袖型、肩线、腰线、下摆处理
|
||||
5. **工艺细节** — 缝线工艺、拉链/纽扣/暗扣、口袋设计、装饰细节、包边/锁边
|
||||
6. **设计评价** — 设计亮点、风格定位(休闲/正装/运动/时尚等)、目标消费群体
|
||||
7. **改进建议** — 如有可改进之处,给出专业建议
|
||||
|
||||
规则:
|
||||
- 如果图片不是服装相关,也请尽力分析图片内容并给出有价值的反馈
|
||||
- 如果用户同时提了文字问题,请结合图片和问题一起回答
|
||||
- 用清晰、结构化的中文回答,适合在设计工作中参考
|
||||
- 回答要专业但不啰嗦,突出重点信息
|
||||
"""
|
||||
|
||||
|
||||
# ==================== 路由判断 ====================
|
||||
|
||||
def is_gemini_model(model_name: str) -> bool:
|
||||
"""判断是否是 Gemini 模型"""
|
||||
return bool(model_name) and "gemini" in model_name.lower()
|
||||
|
||||
|
||||
# ==================== Qwen / OpenAI 兼容调用 ====================
|
||||
|
||||
def call_llm_with_tools(messages_history: List[dict], model_override: str = None):
|
||||
"""调用 LLM,支持 function calling"""
|
||||
from openai import OpenAI
|
||||
|
||||
use_model = model_override or settings.AI_MODEL
|
||||
|
||||
client = OpenAI(
|
||||
api_key=settings.AI_API_KEY,
|
||||
base_url=settings.AI_BASE_URL or "https://api.openai.com/v1",
|
||||
)
|
||||
|
||||
messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history
|
||||
|
||||
log.info(f"{'='*60}")
|
||||
log.info(f"[LLM] 调用工具模型: {use_model}{' (override)' if model_override else ''}")
|
||||
log.info(f"[LLM] 消息数量: {len(messages)} (system + {len(messages_history)} history)")
|
||||
for i, m in enumerate(messages_history[-5:]):
|
||||
role = m['role']
|
||||
content = m['content'][:120] if m.get('content') else '(empty)'
|
||||
log.info(f"[LLM] history[-{len(messages_history)-i}] {role}: {content}")
|
||||
log.info(f"[LLM] 工具数量: {len(PS_TOOLS)}")
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model=use_model,
|
||||
messages=messages,
|
||||
tools=PS_TOOLS,
|
||||
tool_choice="auto",
|
||||
)
|
||||
|
||||
choice = completion.choices[0]
|
||||
message = choice.message
|
||||
|
||||
log.info(f"[LLM] 响应 finish_reason={choice.finish_reason}")
|
||||
if message.content:
|
||||
log.info(f"[LLM] 回复文本: {message.content[:150]}...")
|
||||
if message.tool_calls:
|
||||
for tc in message.tool_calls:
|
||||
log.info(f"[LLM] 工具调用: {tc.function.name}({tc.function.arguments[:200]})")
|
||||
else:
|
||||
log.info(f"[LLM] 无工具调用")
|
||||
log.info(f"{'='*60}")
|
||||
|
||||
if message.tool_calls and len(message.tool_calls) > 0:
|
||||
tool_calls_data = []
|
||||
for tc in message.tool_calls:
|
||||
args = {}
|
||||
if tc.function.arguments:
|
||||
try:
|
||||
args = json.loads(tc.function.arguments)
|
||||
except json.JSONDecodeError:
|
||||
args = {}
|
||||
tool_calls_data.append({
|
||||
"id": tc.id,
|
||||
"name": tc.function.name,
|
||||
"display_name": TOOL_DISPLAY_NAMES.get(tc.function.name, tc.function.name),
|
||||
"args": args,
|
||||
"status": "pending"
|
||||
})
|
||||
return message.content or "", tool_calls_data
|
||||
|
||||
return message.content or "", None
|
||||
|
||||
|
||||
# ==================== Gemini 调用(OpenAI 兼容代理) ====================
|
||||
|
||||
def call_gemini(messages_history: List[dict], model: str, images_b64: List[str] = None) -> str:
|
||||
"""调用 Gemini(通过第三方代理,OpenAI 兼容格式)"""
|
||||
from openai import OpenAI as _OpenAI
|
||||
|
||||
if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL:
|
||||
raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置")
|
||||
|
||||
client = _OpenAI(
|
||||
api_key=settings.GEMINI_API_KEY,
|
||||
base_url=f"{settings.GEMINI_BASE_URL}/v1",
|
||||
)
|
||||
|
||||
messages = []
|
||||
for msg in messages_history:
|
||||
role = msg.get("role", "user")
|
||||
content = msg.get("content", "")
|
||||
if role not in ("system", "user", "assistant"):
|
||||
role = "user"
|
||||
messages.append({"role": role, "content": content})
|
||||
|
||||
if images_b64:
|
||||
last_user_idx = None
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
if messages[i]["role"] == "user":
|
||||
last_user_idx = i
|
||||
break
|
||||
if last_user_idx is not None:
|
||||
text_content = messages[last_user_idx]["content"]
|
||||
multimodal_content = []
|
||||
for img_b64 in images_b64:
|
||||
multimodal_content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
|
||||
})
|
||||
multimodal_content.append({"type": "text", "text": text_content})
|
||||
messages[last_user_idx]["content"] = multimodal_content
|
||||
|
||||
log.info(f"{'='*60}")
|
||||
log.info(f"[Gemini] 调用模型: {model} (OpenAI 兼容)")
|
||||
log.info(f"[Gemini] 消息数: {len(messages)}, 图片数: {len(images_b64) if images_b64 else 0}")
|
||||
|
||||
completion = client.chat.completions.create(model=model, messages=messages)
|
||||
result = completion.choices[0].message.content or ""
|
||||
log.info(f"[Gemini] 回复: {result[:200]}...")
|
||||
return result
|
||||
|
||||
|
||||
def call_gemini_with_tools(messages_history: List[dict], model: str) -> tuple:
|
||||
"""用 Gemini 做对话(不支持 function calling)"""
|
||||
full_history = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history
|
||||
|
||||
log.info(f"{'='*60}")
|
||||
log.info(f"[Gemini] 调用对话模型: {model}")
|
||||
for i, m in enumerate(messages_history[-3:]):
|
||||
log.info(f"[Gemini] history[-{len(messages_history)-i}] {m['role']}: {str(m.get('content',''))[:120]}")
|
||||
|
||||
result = call_gemini(full_history, model)
|
||||
log.info(f"[Gemini] 回复文本: {result[:150]}...")
|
||||
return result, None
|
||||
|
||||
|
||||
# ==================== 视觉模型 ====================
|
||||
|
||||
def call_vision_llm(user_message: str, image_base64: str, history: List[dict], model_override: str = None) -> str:
|
||||
"""调用视觉模型分析图片(自动路由 Qwen / Gemini)"""
|
||||
use_model = model_override or settings.AI_VISION_MODEL
|
||||
|
||||
if is_gemini_model(use_model):
|
||||
log.info(f"[Vision] 使用 Gemini 视觉模型: {use_model}")
|
||||
msgs = [{"role": "system", "content": VISION_PROMPT}]
|
||||
for h in history[-10:]:
|
||||
role = h["role"] if h["role"] != "tool" else "user"
|
||||
msgs.append({"role": role, "content": h["content"]})
|
||||
msgs.append({"role": "user", "content": user_message})
|
||||
return call_gemini(msgs, use_model, images_b64=[image_base64])
|
||||
|
||||
from openai import OpenAI
|
||||
client = OpenAI(api_key=settings.AI_API_KEY, base_url=settings.AI_BASE_URL or "https://api.openai.com/v1")
|
||||
|
||||
user_content = [
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}},
|
||||
{"type": "text", "text": user_message}
|
||||
]
|
||||
messages = [{"role": "system", "content": VISION_PROMPT}]
|
||||
for h in history[-10:]:
|
||||
role = h["role"] if h["role"] != "tool" else "user"
|
||||
messages.append({"role": role, "content": h["content"]})
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
|
||||
completion = client.chat.completions.create(model=use_model, messages=messages)
|
||||
return completion.choices[0].message.content or ""
|
||||
|
||||
|
||||
# ==================== 图片编辑/生成模型 ====================
|
||||
|
||||
def call_image_model(images_b64: List[str], prompt: str, model_override: str = None) -> tuple:
|
||||
"""调用图片编辑/生成模型(自动路由 DashScope / Gemini),返回 (url_or_datauri, description)"""
|
||||
import requests as http_requests
|
||||
|
||||
use_model = model_override or settings.AI_IMAGE_EDIT_MODEL
|
||||
|
||||
# ---------- Gemini(OpenAI 兼容代理) ----------
|
||||
if is_gemini_model(use_model):
|
||||
log.info(f"[ImageModel] 使用 Gemini 图片模型: {use_model}")
|
||||
if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL:
|
||||
raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置")
|
||||
|
||||
from openai import OpenAI as _OpenAI
|
||||
client = _OpenAI(api_key=settings.GEMINI_API_KEY, base_url=f"{settings.GEMINI_BASE_URL}/v1")
|
||||
|
||||
content_parts = [{"type": "text", "text": prompt}]
|
||||
for img_b64 in images_b64:
|
||||
content_parts.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
|
||||
|
||||
log.info(f"[ImageModel] Gemini OpenAI 兼容, 模型: {use_model}")
|
||||
log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图: {[len(b)//1024 for b in images_b64]}KB")
|
||||
log.info(f"[ImageModel] 提示词: {prompt[:200]}")
|
||||
|
||||
completion = client.chat.completions.create(model=use_model, messages=[{"role": "user", "content": content_parts}])
|
||||
result_content = completion.choices[0].message.content or ""
|
||||
log.info(f"[ImageModel] Gemini 回复长度: {len(result_content)} chars")
|
||||
|
||||
# 提取 base64 图片
|
||||
match = re.search(r'!\[.*?\]\((data:image/(\w+);base64,([^)]+))\)', result_content)
|
||||
if not match:
|
||||
match = re.search(r'(data:image/(\w+);base64,([A-Za-z0-9+/=]+))', result_content)
|
||||
if not match:
|
||||
log.warning(f"[ImageModel] Gemini 响应中无图片,前500字: {result_content[:500]}")
|
||||
raise ValueError("Gemini 未返回图片,请检查模型是否支持图片生成")
|
||||
|
||||
img_format = match.group(2)
|
||||
image_b64 = match.group(3)
|
||||
padding = 4 - len(image_b64) % 4
|
||||
if padding != 4:
|
||||
image_b64 += '=' * padding
|
||||
|
||||
log.info(f"[ImageModel] Gemini 返回图片: image/{img_format}, {len(image_b64)//1024}KB")
|
||||
_save_debug_image(base64.b64decode(image_b64), f'gemini_output.{img_format}')
|
||||
|
||||
description = re.sub(r'!\[.*?\]\(data:image/[^)]+\)', '', result_content).strip()
|
||||
return f"data:image/{img_format};base64,{image_b64}", description
|
||||
|
||||
# ---------- DashScope 原生接口 ----------
|
||||
api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"
|
||||
content_parts = []
|
||||
for img_b64 in images_b64:
|
||||
content_parts.append({"image": f"data:image/jpeg;base64,{img_b64}"})
|
||||
content_parts.append({"text": prompt})
|
||||
|
||||
payload = {
|
||||
"model": use_model,
|
||||
"input": {"messages": [{"role": "user", "content": content_parts}]},
|
||||
"parameters": {"n": 1, "watermark": False, "prompt_extend": True}
|
||||
}
|
||||
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {settings.AI_API_KEY}"}
|
||||
|
||||
log.info(f"{'='*60}")
|
||||
log.info(f"[ImageModel] DashScope 原生 API, 模型: {use_model}")
|
||||
log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图: {[len(b)//1024 for b in images_b64]}KB")
|
||||
log.info(f"[ImageModel] 提示词: {prompt[:200]}")
|
||||
|
||||
for idx, img_b64 in enumerate(images_b64):
|
||||
_save_debug_image(base64.b64decode(img_b64), f'input_{idx}.jpg')
|
||||
|
||||
resp = http_requests.post(api_url, json=payload, headers=headers, timeout=120)
|
||||
if resp.status_code != 200:
|
||||
error_data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
|
||||
err_msg = error_data.get("message", resp.text[:300])
|
||||
log.error(f"[ImageModel] API 错误: {resp.status_code} {err_msg}")
|
||||
if "data_inspection_failed" in str(error_data):
|
||||
raise ValueError("图片内容未通过安全审核,请更换图片")
|
||||
raise ValueError(f"图片模型调用失败({resp.status_code}): {err_msg}")
|
||||
|
||||
data = resp.json()
|
||||
output = data.get("output", {})
|
||||
choices = output.get("choices", [])
|
||||
if not choices:
|
||||
raise ValueError("模型未返回结果")
|
||||
|
||||
content_list = choices[0].get("message", {}).get("content", [])
|
||||
image_url = None
|
||||
description = ""
|
||||
for item in content_list:
|
||||
if isinstance(item, dict):
|
||||
if "image" in item:
|
||||
image_url = item["image"]
|
||||
elif "text" in item:
|
||||
description += item["text"]
|
||||
|
||||
if not image_url:
|
||||
raise ValueError("模型未返回图片")
|
||||
|
||||
log.info(f"[ImageModel] 输出图片 URL: {image_url[:120]}...")
|
||||
try:
|
||||
out_resp = http_requests.get(image_url, timeout=60)
|
||||
if out_resp.status_code == 200:
|
||||
_save_debug_image(out_resp.content, 'output.png')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return image_url, description
|
||||
|
||||
|
||||
# ==================== 验证套图效果 ====================
|
||||
|
||||
def verify_pattern_result(garment_b64: str, canvas_b64: str, extra_prompt: str = None, vision_model: str = None) -> str:
|
||||
"""用视觉模型对比原始成衣和套图结果"""
|
||||
use_model = vision_model or settings.AI_VISION_MODEL
|
||||
|
||||
log.info(f"[Verify] 模型: {use_model}, 成衣: {len(garment_b64)//1024}KB, 画布: {len(canvas_b64)//1024}KB")
|
||||
|
||||
verify_prompt = (
|
||||
"请对比这两张图片:第一张是原始成衣照片,第二张是套图结果。\n"
|
||||
"验证:1. 花样还原度 2. 裁片覆盖完整度 3. 对齐质量 4. 整体效果。给出评分(1-10)和改进建议。"
|
||||
)
|
||||
if extra_prompt:
|
||||
verify_prompt += f"\n用户补充:{extra_prompt}"
|
||||
|
||||
if is_gemini_model(use_model):
|
||||
return call_gemini(
|
||||
[{"role": "user", "content": "你是服装套图质量检验专家。"}, {"role": "user", "content": verify_prompt}],
|
||||
use_model, images_b64=[garment_b64, canvas_b64]
|
||||
)
|
||||
|
||||
from openai import OpenAI
|
||||
client = OpenAI(api_key=settings.AI_API_KEY, base_url=settings.AI_BASE_URL or "https://api.openai.com/v1")
|
||||
completion = client.chat.completions.create(
|
||||
model=use_model,
|
||||
messages=[
|
||||
{"role": "system", "content": "你是服装套图质量检验专家。"},
|
||||
{"role": "user", "content": [
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{garment_b64}"}},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{canvas_b64}"}},
|
||||
{"type": "text", "text": verify_prompt},
|
||||
]},
|
||||
],
|
||||
)
|
||||
return completion.choices[0].message.content or ""
|
||||
|
||||
|
||||
# ==================== Mock ====================
|
||||
|
||||
def mock_reply(message: str, has_image: bool = False) -> str:
|
||||
"""未配置 API Key 时的模拟回复"""
|
||||
if has_image:
|
||||
return "【模拟分析】收到图片。AI 分析功能需要配置 AI_API_KEY。"
|
||||
if "套图" in message:
|
||||
return "套图功能在「参数预设」页面。先选择花样组和裁片组,再添加规则,最后点击生成。"
|
||||
return "你好!我是 DesignerCEP AI 助手。你可以问我关于套图、裁片、对齐等功能的问题。"
|
||||
|
||||
|
||||
# ==================== 工具函数 ====================
|
||||
|
||||
def _save_debug_image(data: bytes, filename: str):
|
||||
"""保存调试图片到 debug_images 目录"""
|
||||
import os, time
|
||||
debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images')
|
||||
os.makedirs(debug_dir, exist_ok=True)
|
||||
ts = int(time.time())
|
||||
path = os.path.join(debug_dir, f'{ts}_{filename}')
|
||||
try:
|
||||
with open(path, 'wb') as f:
|
||||
f.write(data)
|
||||
log.info(f"[Debug] 图片已保存: {path}")
|
||||
except Exception as e:
|
||||
log.warning(f"[Debug] 保存失败: {e}")
|
||||
Reference in New Issue
Block a user