feat: 65个PS操作工具 + ai_chat拆分 + 开发准则更新 - ps-operations.ts 1361行(图层/变换/选区/蒙版/调色/文字/文档管理) - ai_llm.py 模型调用层独立 - ai_tools.py 65个function calling schema - aiToolExecutor.ts 全量映射 - 开发准则新增AI模块规范

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 18:52:34 +08:00
parent dae906aba7
commit 8688422578
22 changed files with 1014 additions and 574 deletions
--- a/Server/app/api/v1/ai_llm.py
+++ b/Server/app/api/v1/ai_llm.py
@@ -0,0 +1,414 @@
+# -*- coding: utf-8 -*-
+"""
+AI 模型调用层
+统一管理所有 LLM / Vision / Image 模型的调用逻辑
+支持 Qwen (DashScope) 和 Gemini (第三方代理) 两套路由
+"""
+
+from typing import List
+import json, base64, re, logging
+from app.core.config import settings
+from app.api.v1.ai_tools import PS_TOOLS, TOOL_DISPLAY_NAMES
+
+log = logging.getLogger(__name__)
+
+# ==================== Prompts ====================
+
+SYSTEM_PROMPT = """你是 DesignerCEP 的 AI 助手，运行在 Adobe Photoshop CEP 插件中。
+
+你的能力：
+1. 回答关于 Photoshop 操作和插件使用的问题
+2. 通过工具直接操作 Photoshop（创建图层、对齐、查看文档信息等）
+3. 帮用户排查操作中遇到的错误
+4. **AI 智能套图**：两阶段流程 — 先生成预览确认，再提取套到裁片上
+
+## AI 智能套图流程（两阶段）
+
+当用户上传成衣图片并要求套图时：
+
+### 阶段 1 — 识别裁片 + 生成预览（需用户确认）
+1. 调用 **identify_pieces** — 截取画布并识别每个图层是什么裁片部位（前片、后片、袖子等）
+2. 告诉用户识别结果（如 "M-1=前片, M-2=后片, M-5=左袖..."）
+3. 调用 **generate_garment_preview** — 会自动在裁片下方标注名称标签（如 "M-1 前片"），然后截取带标签的画布 + 成衣照片一起发给 AI 生成预览
+4. 预览图显示在聊天中，每个裁片都有标签，**等用户确认 OK 后**进入阶段 2
+
+### 阶段 2 — 提取花样 + 正式套图（用户确认后）
+5. 根据 identify_pieces 分析结果中每个裁片的 type 决定处理方式：
+   - solid → 设 color 字段，PS 直接纯色填充
+   - fill_pattern → AI 提取花型铺满
+   - theme_pattern → 底层 PS 纯色填充（设 color）+ 上层 AI 提取主题图案（白底+正片叠底）
+   - mixed_pattern → 底层 AI 提取花型 + 上层 AI 提取主题图案（白底+正片叠底）
+6. 调用 **extract_and_apply_all_pieces** 执行套图
+7. 可选：调用 **verify_pattern_result** 验证效果
+
+重要：
+- 阶段 1 完成后必须**等用户说"可以"/"OK"**才执行阶段 2
+- 用户可能会说"袖子纯色"、"后幅不要花样"等，要根据 identify_pieces 的结果对应到正确的图层名
+
+重要规则：
+- 当用户要求执行 PS 操作时，使用工具完成
+- 执行操作前可以先了解当前文档和图层状态
+- 用简洁的中文回答，适合在小面板中阅读
+- 如果工具执行失败，向用户解释原因并建议解决方案
+"""
+
+VISION_PROMPT = """你是一位资深的服装设计分析师，同时也是 DesignerCEP 的 AI 助手。
+
+当用户发送服装/成衣图片时，请从以下维度进行专业分析：
+
+1. **服装类别** — 上衣/裤子/裙子/连衣裙/外套/配饰等，细分款式
+2. **面料分析** — 根据视觉特征推测面料类型（棉、涤纶、丝绸、针织、牛仔、雪纺等），分析面料质感
+3. **颜色与印花** — 主色调、配色方案、印花/图案类型及工艺（数码印花、丝网印刷、提花等）
+4. **版型特点** — 修身/宽松/A字/H型等，分析领口、袖型、肩线、腰线、下摆处理
+5. **工艺细节** — 缝线工艺、拉链/纽扣/暗扣、口袋设计、装饰细节、包边/锁边
+6. **设计评价** — 设计亮点、风格定位（休闲/正装/运动/时尚等）、目标消费群体
+7. **改进建议** — 如有可改进之处，给出专业建议
+
+规则：
+- 如果图片不是服装相关，也请尽力分析图片内容并给出有价值的反馈
+- 如果用户同时提了文字问题，请结合图片和问题一起回答
+- 用清晰、结构化的中文回答，适合在设计工作中参考
+- 回答要专业但不啰嗦，突出重点信息
+"""
+
+
+# ==================== 路由判断 ====================
+
+def is_gemini_model(model_name: str) -> bool:
+    """判断是否是 Gemini 模型"""
+    return bool(model_name) and "gemini" in model_name.lower()
+
+
+# ==================== Qwen / OpenAI 兼容调用 ====================
+
+def call_llm_with_tools(messages_history: List[dict], model_override: str = None):
+    """调用 LLM，支持 function calling"""
+    from openai import OpenAI
+
+    use_model = model_override or settings.AI_MODEL
+
+    client = OpenAI(
+        api_key=settings.AI_API_KEY,
+        base_url=settings.AI_BASE_URL or "https://api.openai.com/v1",
+    )
+
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history
+
+    log.info(f"{'='*60}")
+    log.info(f"[LLM] 调用工具模型: {use_model}{' (override)' if model_override else ''}")
+    log.info(f"[LLM] 消息数量: {len(messages)} (system + {len(messages_history)} history)")
+    for i, m in enumerate(messages_history[-5:]):
+        role = m['role']
+        content = m['content'][:120] if m.get('content') else '(empty)'
+        log.info(f"[LLM] history[-{len(messages_history)-i}] {role}: {content}")
+    log.info(f"[LLM] 工具数量: {len(PS_TOOLS)}")
+
+    completion = client.chat.completions.create(
+        model=use_model,
+        messages=messages,
+        tools=PS_TOOLS,
+        tool_choice="auto",
+    )
+
+    choice = completion.choices[0]
+    message = choice.message
+
+    log.info(f"[LLM] 响应 finish_reason={choice.finish_reason}")
+    if message.content:
+        log.info(f"[LLM] 回复文本: {message.content[:150]}...")
+    if message.tool_calls:
+        for tc in message.tool_calls:
+            log.info(f"[LLM] 工具调用: {tc.function.name}({tc.function.arguments[:200]})")
+    else:
+        log.info(f"[LLM] 无工具调用")
+    log.info(f"{'='*60}")
+
+    if message.tool_calls and len(message.tool_calls) > 0:
+        tool_calls_data = []
+        for tc in message.tool_calls:
+            args = {}
+            if tc.function.arguments:
+                try:
+                    args = json.loads(tc.function.arguments)
+                except json.JSONDecodeError:
+                    args = {}
+            tool_calls_data.append({
+                "id": tc.id,
+                "name": tc.function.name,
+                "display_name": TOOL_DISPLAY_NAMES.get(tc.function.name, tc.function.name),
+                "args": args,
+                "status": "pending"
+            })
+        return message.content or "", tool_calls_data
+
+    return message.content or "", None
+
+
+# ==================== Gemini 调用（OpenAI 兼容代理） ====================
+
+def call_gemini(messages_history: List[dict], model: str, images_b64: List[str] = None) -> str:
+    """调用 Gemini（通过第三方代理，OpenAI 兼容格式）"""
+    from openai import OpenAI as _OpenAI
+
+    if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL:
+        raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置")
+
+    client = _OpenAI(
+        api_key=settings.GEMINI_API_KEY,
+        base_url=f"{settings.GEMINI_BASE_URL}/v1",
+    )
+
+    messages = []
+    for msg in messages_history:
+        role = msg.get("role", "user")
+        content = msg.get("content", "")
+        if role not in ("system", "user", "assistant"):
+            role = "user"
+        messages.append({"role": role, "content": content})
+
+    if images_b64:
+        last_user_idx = None
+        for i in range(len(messages) - 1, -1, -1):
+            if messages[i]["role"] == "user":
+                last_user_idx = i
+                break
+        if last_user_idx is not None:
+            text_content = messages[last_user_idx]["content"]
+            multimodal_content = []
+            for img_b64 in images_b64:
+                multimodal_content.append({
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
+                })
+            multimodal_content.append({"type": "text", "text": text_content})
+            messages[last_user_idx]["content"] = multimodal_content
+
+    log.info(f"{'='*60}")
+    log.info(f"[Gemini] 调用模型: {model} (OpenAI 兼容)")
+    log.info(f"[Gemini] 消息数: {len(messages)}, 图片数: {len(images_b64) if images_b64 else 0}")
+
+    completion = client.chat.completions.create(model=model, messages=messages)
+    result = completion.choices[0].message.content or ""
+    log.info(f"[Gemini] 回复: {result[:200]}...")
+    return result
+
+
+def call_gemini_with_tools(messages_history: List[dict], model: str) -> tuple:
+    """用 Gemini 做对话（不支持 function calling）"""
+    full_history = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history
+
+    log.info(f"{'='*60}")
+    log.info(f"[Gemini] 调用对话模型: {model}")
+    for i, m in enumerate(messages_history[-3:]):
+        log.info(f"[Gemini] history[-{len(messages_history)-i}] {m['role']}: {str(m.get('content',''))[:120]}")
+
+    result = call_gemini(full_history, model)
+    log.info(f"[Gemini] 回复文本: {result[:150]}...")
+    return result, None
+
+
+# ==================== 视觉模型 ====================
+
+def call_vision_llm(user_message: str, image_base64: str, history: List[dict], model_override: str = None) -> str:
+    """调用视觉模型分析图片（自动路由 Qwen / Gemini）"""
+    use_model = model_override or settings.AI_VISION_MODEL
+
+    if is_gemini_model(use_model):
+        log.info(f"[Vision] 使用 Gemini 视觉模型: {use_model}")
+        msgs = [{"role": "system", "content": VISION_PROMPT}]
+        for h in history[-10:]:
+            role = h["role"] if h["role"] != "tool" else "user"
+            msgs.append({"role": role, "content": h["content"]})
+        msgs.append({"role": "user", "content": user_message})
+        return call_gemini(msgs, use_model, images_b64=[image_base64])
+
+    from openai import OpenAI
+    client = OpenAI(api_key=settings.AI_API_KEY, base_url=settings.AI_BASE_URL or "https://api.openai.com/v1")
+
+    user_content = [
+        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}},
+        {"type": "text", "text": user_message}
+    ]
+    messages = [{"role": "system", "content": VISION_PROMPT}]
+    for h in history[-10:]:
+        role = h["role"] if h["role"] != "tool" else "user"
+        messages.append({"role": role, "content": h["content"]})
+    messages.append({"role": "user", "content": user_content})
+
+    completion = client.chat.completions.create(model=use_model, messages=messages)
+    return completion.choices[0].message.content or ""
+
+
+# ==================== 图片编辑/生成模型 ====================
+
+def call_image_model(images_b64: List[str], prompt: str, model_override: str = None) -> tuple:
+    """调用图片编辑/生成模型（自动路由 DashScope / Gemini），返回 (url_or_datauri, description)"""
+    import requests as http_requests
+
+    use_model = model_override or settings.AI_IMAGE_EDIT_MODEL
+
+    # ---------- Gemini（OpenAI 兼容代理） ----------
+    if is_gemini_model(use_model):
+        log.info(f"[ImageModel] 使用 Gemini 图片模型: {use_model}")
+        if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL:
+            raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置")
+
+        from openai import OpenAI as _OpenAI
+        client = _OpenAI(api_key=settings.GEMINI_API_KEY, base_url=f"{settings.GEMINI_BASE_URL}/v1")
+
+        content_parts = [{"type": "text", "text": prompt}]
+        for img_b64 in images_b64:
+            content_parts.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
+
+        log.info(f"[ImageModel] Gemini OpenAI 兼容, 模型: {use_model}")
+        log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图: {[len(b)//1024 for b in images_b64]}KB")
+        log.info(f"[ImageModel] 提示词: {prompt[:200]}")
+
+        completion = client.chat.completions.create(model=use_model, messages=[{"role": "user", "content": content_parts}])
+        result_content = completion.choices[0].message.content or ""
+        log.info(f"[ImageModel] Gemini 回复长度: {len(result_content)} chars")
+
+        # 提取 base64 图片
+        match = re.search(r'!\[.*?\]\((data:image/(\w+);base64,([^)]+))\)', result_content)
+        if not match:
+            match = re.search(r'(data:image/(\w+);base64,([A-Za-z0-9+/=]+))', result_content)
+        if not match:
+            log.warning(f"[ImageModel] Gemini 响应中无图片，前500字: {result_content[:500]}")
+            raise ValueError("Gemini 未返回图片，请检查模型是否支持图片生成")
+
+        img_format = match.group(2)
+        image_b64 = match.group(3)
+        padding = 4 - len(image_b64) % 4
+        if padding != 4:
+            image_b64 += '=' * padding
+
+        log.info(f"[ImageModel] Gemini 返回图片: image/{img_format}, {len(image_b64)//1024}KB")
+        _save_debug_image(base64.b64decode(image_b64), f'gemini_output.{img_format}')
+
+        description = re.sub(r'!\[.*?\]\(data:image/[^)]+\)', '', result_content).strip()
+        return f"data:image/{img_format};base64,{image_b64}", description
+
+    # ---------- DashScope 原生接口 ----------
+    api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"
+    content_parts = []
+    for img_b64 in images_b64:
+        content_parts.append({"image": f"data:image/jpeg;base64,{img_b64}"})
+    content_parts.append({"text": prompt})
+
+    payload = {
+        "model": use_model,
+        "input": {"messages": [{"role": "user", "content": content_parts}]},
+        "parameters": {"n": 1, "watermark": False, "prompt_extend": True}
+    }
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {settings.AI_API_KEY}"}
+
+    log.info(f"{'='*60}")
+    log.info(f"[ImageModel] DashScope 原生 API, 模型: {use_model}")
+    log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图: {[len(b)//1024 for b in images_b64]}KB")
+    log.info(f"[ImageModel] 提示词: {prompt[:200]}")
+
+    for idx, img_b64 in enumerate(images_b64):
+        _save_debug_image(base64.b64decode(img_b64), f'input_{idx}.jpg')
+
+    resp = http_requests.post(api_url, json=payload, headers=headers, timeout=120)
+    if resp.status_code != 200:
+        error_data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
+        err_msg = error_data.get("message", resp.text[:300])
+        log.error(f"[ImageModel] API 错误: {resp.status_code} {err_msg}")
+        if "data_inspection_failed" in str(error_data):
+            raise ValueError("图片内容未通过安全审核，请更换图片")
+        raise ValueError(f"图片模型调用失败({resp.status_code}): {err_msg}")
+
+    data = resp.json()
+    output = data.get("output", {})
+    choices = output.get("choices", [])
+    if not choices:
+        raise ValueError("模型未返回结果")
+
+    content_list = choices[0].get("message", {}).get("content", [])
+    image_url = None
+    description = ""
+    for item in content_list:
+        if isinstance(item, dict):
+            if "image" in item:
+                image_url = item["image"]
+            elif "text" in item:
+                description += item["text"]
+
+    if not image_url:
+        raise ValueError("模型未返回图片")
+
+    log.info(f"[ImageModel] 输出图片 URL: {image_url[:120]}...")
+    try:
+        out_resp = http_requests.get(image_url, timeout=60)
+        if out_resp.status_code == 200:
+            _save_debug_image(out_resp.content, 'output.png')
+    except Exception:
+        pass
+
+    return image_url, description
+
+
+# ==================== 验证套图效果 ====================
+
+def verify_pattern_result(garment_b64: str, canvas_b64: str, extra_prompt: str = None, vision_model: str = None) -> str:
+    """用视觉模型对比原始成衣和套图结果"""
+    use_model = vision_model or settings.AI_VISION_MODEL
+
+    log.info(f"[Verify] 模型: {use_model}, 成衣: {len(garment_b64)//1024}KB, 画布: {len(canvas_b64)//1024}KB")
+
+    verify_prompt = (
+        "请对比这两张图片：第一张是原始成衣照片，第二张是套图结果。\n"
+        "验证：1. 花样还原度 2. 裁片覆盖完整度 3. 对齐质量 4. 整体效果。给出评分(1-10)和改进建议。"
+    )
+    if extra_prompt:
+        verify_prompt += f"\n用户补充：{extra_prompt}"
+
+    if is_gemini_model(use_model):
+        return call_gemini(
+            [{"role": "user", "content": "你是服装套图质量检验专家。"}, {"role": "user", "content": verify_prompt}],
+            use_model, images_b64=[garment_b64, canvas_b64]
+        )
+
+    from openai import OpenAI
+    client = OpenAI(api_key=settings.AI_API_KEY, base_url=settings.AI_BASE_URL or "https://api.openai.com/v1")
+    completion = client.chat.completions.create(
+        model=use_model,
+        messages=[
+            {"role": "system", "content": "你是服装套图质量检验专家。"},
+            {"role": "user", "content": [
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{garment_b64}"}},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{canvas_b64}"}},
+                {"type": "text", "text": verify_prompt},
+            ]},
+        ],
+    )
+    return completion.choices[0].message.content or ""
+
+
+# ==================== Mock ====================
+
+def mock_reply(message: str, has_image: bool = False) -> str:
+    """未配置 API Key 时的模拟回复"""
+    if has_image:
+        return "【模拟分析】收到图片。AI 分析功能需要配置 AI_API_KEY。"
+    if "套图" in message:
+        return "套图功能在「参数预设」页面。先选择花样组和裁片组，再添加规则，最后点击生成。"
+    return "你好！我是 DesignerCEP AI 助手。你可以问我关于套图、裁片、对齐等功能的问题。"
+
+
+# ==================== 工具函数 ====================
+
+def _save_debug_image(data: bytes, filename: str):
+    """保存调试图片到 debug_images 目录"""
+    import os, time
+    debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images')
+    os.makedirs(debug_dir, exist_ok=True)
+    ts = int(time.time())
+    path = os.path.join(debug_dir, f'{ts}_{filename}')
+    try:
+        with open(path, 'wb') as f:
+            f.write(data)
+        log.info(f"[Debug] 图片已保存: {path}")
+    except Exception as e:
+        log.warning(f"[Debug] 保存失败: {e}")