diff --git a/Server/app/api/v1/ai_chat.py b/Server/app/api/v1/ai_chat.py index 5558909..c67ce46 100644 --- a/Server/app/api/v1/ai_chat.py +++ b/Server/app/api/v1/ai_chat.py @@ -15,6 +15,12 @@ from app.db import get_db from app.models.user import User from app.models.chat import ChatSession, ChatMessage from app.api.v1.ai_tools import PS_TOOLS, TOOL_DISPLAY_NAMES +from app.api.v1.ai_llm import ( + SYSTEM_PROMPT, VISION_PROMPT, + is_gemini_model, call_llm_with_tools, call_gemini_with_tools, + call_vision_llm, call_gemini, call_image_model, + verify_pattern_result, mock_reply, +) log = logging.getLogger(__name__) # 防止 uvicorn --reload 导致 handler 重复 @@ -61,65 +67,6 @@ class VerifyResultRequest(BaseModel): prompt: Optional[str] = None vision_model: Optional[str] = None -# ==================== System Prompt ==================== - -SYSTEM_PROMPT = """你是 DesignerCEP 的 AI 助手,运行在 Adobe Photoshop CEP 插件中。 - -你的能力: -1. 回答关于 Photoshop 操作和插件使用的问题 -2. 通过工具直接操作 Photoshop(创建图层、对齐、查看文档信息等) -3. 帮用户排查操作中遇到的错误 -4. **AI 智能套图**:两阶段流程 — 先生成预览确认,再提取套到裁片上 - -## AI 智能套图流程(两阶段) - -当用户上传成衣图片并要求套图时: - -### 阶段 1 — 识别裁片 + 生成预览(需用户确认) -1. 调用 **identify_pieces** — 截取画布并识别每个图层是什么裁片部位(前片、后片、袖子等) -2. 告诉用户识别结果(如 "M-1=前片, M-2=后片, M-5=左袖...") -3. 调用 **generate_garment_preview** — 会自动在裁片下方标注名称标签(如 "M-1 前片"),然后截取带标签的画布 + 成衣照片一起发给 AI 生成预览 -4. 预览图显示在聊天中,每个裁片都有标签,**等用户确认 OK 后**进入阶段 2 - -### 阶段 2 — 提取花样 + 正式套图(用户确认后) -5. 根据用户反馈决定每个裁片的处理方式: - - 需要花样的裁片 → extract_and_apply_all_pieces 中不设 color 字段 - - 纯色的裁片(如后幅) → 设置 color 字段为对应颜色值(如 "#F5E6D0"),直接 PS 填充不调 AI -6. 调用 **extract_and_apply_all_pieces** 执行套图 -7. 可选:调用 **verify_pattern_result** 验证效果 - -重要: -- 阶段 1 完成后必须**等用户说"可以"/"OK"**才执行阶段 2 -- 用户可能会说"袖子纯色"、"后幅不要花样"等,要根据 identify_pieces 的结果对应到正确的图层名 -- 纯色裁片用 color 字段直接填充,不要浪费 AI 提取调用 - -重要规则: -- 当用户要求执行 PS 操作时,使用工具完成 -- 执行操作前可以先了解当前文档和图层状态 -- 用简洁的中文回答,适合在小面板中阅读 -- 如果工具执行失败,向用户解释原因并建议解决方案 -- 套图时应一次性完成「生成 → 套图 → 验证」全流程,不要中途停下等待用户指令 -""" - -VISION_PROMPT = """你是一位资深的服装设计分析师,同时也是 DesignerCEP 的 AI 助手。 - -当用户发送服装/成衣图片时,请从以下维度进行专业分析: - -1. **服装类别** — 上衣/裤子/裙子/连衣裙/外套/配饰等,细分款式 -2. **面料分析** — 根据视觉特征推测面料类型(棉、涤纶、丝绸、针织、牛仔、雪纺等),分析面料质感 -3. **颜色与印花** — 主色调、配色方案、印花/图案类型及工艺(数码印花、丝网印刷、提花等) -4. **版型特点** — 修身/宽松/A字/H型等,分析领口、袖型、肩线、腰线、下摆处理 -5. **工艺细节** — 缝线工艺、拉链/纽扣/暗扣、口袋设计、装饰细节、包边/锁边 -6. **设计评价** — 设计亮点、风格定位(休闲/正装/运动/时尚等)、目标消费群体 -7. **改进建议** — 如有可改进之处,给出专业建议 - -规则: -- 如果图片不是服装相关,也请尽力分析图片内容并给出有价值的反馈 -- 如果用户同时提了文字问题,请结合图片和问题一起回答 -- 用清晰、结构化的中文回答,适合在设计工作中参考 -- 回答要专业但不啰嗦,突出重点信息 -""" - # ==================== 对话管理接口 ==================== @router.get("/ai/models") @@ -274,8 +221,8 @@ async def chat( # 4. 调用 LLM(统一走工具模型,图片信息通过文本标记传递) try: - if not settings.AI_API_KEY and not (data.model and _is_gemini_model(data.model)): - reply_content = _mock_reply(data.message, has_image) + if not settings.AI_API_KEY and not (data.model and is_gemini_model(data.model)): + reply_content = mock_reply(data.message, has_image) tool_calls_data = None else: call_history = history_list @@ -287,11 +234,11 @@ async def chat( }] # 根据模型类型路由 - if data.model and _is_gemini_model(data.model): + if data.model and is_gemini_model(data.model): log.info(f"[Chat] 路由到 Gemini: {data.model}") - reply_content, tool_calls_data = _call_gemini_with_tools(call_history, data.model) + reply_content, tool_calls_data = call_gemini_with_tools(call_history, data.model) else: - reply_content, tool_calls_data = _call_llm_with_tools(call_history, model_override=data.model) + reply_content, tool_calls_data = call_llm_with_tools(call_history, model_override=data.model) except Exception as e: reply_content = f"AI 请求出错: {str(e)}" tool_calls_data = None @@ -348,15 +295,15 @@ async def submit_tool_result( # 3. 再次调用 LLM(这次不带 tools,让 AI 总结结果) try: - if not settings.AI_API_KEY and not (data.model and _is_gemini_model(data.model)): + if not settings.AI_API_KEY and not (data.model and is_gemini_model(data.model)): reply_content = f"工具 {data.tool_name} 执行完成。" tool_calls_data = None else: - if data.model and _is_gemini_model(data.model): + if data.model and is_gemini_model(data.model): log.info(f"[ToolResult] 路由到 Gemini: {data.model}") - reply_content, tool_calls_data = _call_gemini_with_tools(history_list, data.model) + reply_content, tool_calls_data = call_gemini_with_tools(history_list, data.model) else: - reply_content, tool_calls_data = _call_llm_with_tools(history_list, model_override=data.model) + reply_content, tool_calls_data = call_llm_with_tools(history_list, model_override=data.model) except Exception as e: reply_content = f"AI 总结出错: {str(e)}" tool_calls_data = None @@ -379,235 +326,6 @@ async def submit_tool_result( } -# ==================== LLM 调用 ==================== - -def _call_llm_with_tools(messages_history: List[dict], model_override: str = None): - """调用 LLM,支持 function calling""" - from openai import OpenAI - - use_model = model_override or settings.AI_MODEL - - client = OpenAI( - api_key=settings.AI_API_KEY, - base_url=settings.AI_BASE_URL or "https://api.openai.com/v1", - ) - - messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history - - # ---- 日志:请求详情 ---- - log.info(f"{'='*60}") - log.info(f"[LLM] 调用工具模型: {use_model}{' (override)' if model_override else ''}") - log.info(f"[LLM] 消息数量: {len(messages)} (system + {len(messages_history)} history)") - for i, m in enumerate(messages_history[-5:]): # 只打最近 5 条 - role = m['role'] - content = m['content'][:120] if m.get('content') else '(empty)' - log.info(f"[LLM] history[-{len(messages_history)-i}] {role}: {content}") - log.info(f"[LLM] 工具数量: {len(PS_TOOLS)}") - - completion = client.chat.completions.create( - model=use_model, - messages=messages, - tools=PS_TOOLS, - tool_choice="auto", - ) - - choice = completion.choices[0] - message = choice.message - - # ---- 日志:响应详情 ---- - log.info(f"[LLM] 响应 finish_reason={choice.finish_reason}") - if message.content: - log.info(f"[LLM] 回复文本: {message.content[:150]}...") - if message.tool_calls: - for tc in message.tool_calls: - log.info(f"[LLM] 工具调用: {tc.function.name}({tc.function.arguments[:200]})") - else: - log.info(f"[LLM] 无工具调用") - log.info(f"{'='*60}") - - # 检查是否有工具调用 - if message.tool_calls and len(message.tool_calls) > 0: - tool_calls_data = [] - for tc in message.tool_calls: - args = {} - if tc.function.arguments: - try: - args = json.loads(tc.function.arguments) - except json.JSONDecodeError: - args = {} - - tool_calls_data.append({ - "id": tc.id, - "name": tc.function.name, - "display_name": TOOL_DISPLAY_NAMES.get(tc.function.name, tc.function.name), - "args": args, - "status": "pending" - }) - - return message.content or "", tool_calls_data - - # 普通文本回复 - return message.content or "", None - - -# ==================== Gemini 调用 ==================== - -def _is_gemini_model(model_name: str) -> bool: - """判断是否是 Gemini 模型""" - return model_name and "gemini" in model_name.lower() - - -def _call_gemini(messages_history: List[dict], model: str, images_b64: List[str] = None) -> str: - """ - 调用 Gemini API(通过第三方代理,OpenAI 兼容格式) - 支持纯文本对话和图片输入(视觉分析) - 返回文本内容 - """ - from openai import OpenAI as _OpenAI - - if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL: - raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置") - - client = _OpenAI( - api_key=settings.GEMINI_API_KEY, - base_url=f"{settings.GEMINI_BASE_URL}/v1", - ) - - # 构造 OpenAI 格式消息 - messages = [] - for msg in messages_history: - role = msg.get("role", "user") - content = msg.get("content", "") - # OpenAI 格式支持 system / user / assistant - if role not in ("system", "user", "assistant"): - role = "user" - messages.append({"role": role, "content": content}) - - # 如果有图片,把最后一条 user 消息改成多模态格式 - if images_b64: - # 找到最后一条 user 消息 - last_user_idx = None - for i in range(len(messages) - 1, -1, -1): - if messages[i]["role"] == "user": - last_user_idx = i - break - - if last_user_idx is not None: - text_content = messages[last_user_idx]["content"] - multimodal_content = [] - # 先放图片 - for img_b64 in images_b64: - multimodal_content.append({ - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"} - }) - # 再放文本 - multimodal_content.append({"type": "text", "text": text_content}) - messages[last_user_idx]["content"] = multimodal_content - - log.info(f"{'='*60}") - log.info(f"[Gemini] 调用模型: {model} (OpenAI 兼容)") - log.info(f"[Gemini] 消息数: {len(messages)}, 图片数: {len(images_b64) if images_b64 else 0}") - - completion = client.chat.completions.create( - model=model, - messages=messages, - ) - - result = completion.choices[0].message.content or "" - log.info(f"[Gemini] 回复: {result[:200]}...") - return result - - -def _call_gemini_with_tools(messages_history: List[dict], model: str) -> tuple: - """ - 用 Gemini 做对话(不支持 function calling,靠 prompt 引导调用工具) - 返回 (content, tool_calls_data) — tool_calls_data 始终为 None - 注意:Gemini 不原生支持 function calling,但文本对话正常 - """ - # 加入 system prompt - full_history = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history - - log.info(f"{'='*60}") - log.info(f"[Gemini] 调用对话模型: {model}") - for i, m in enumerate(messages_history[-3:]): - log.info(f"[Gemini] history[-{len(messages_history)-i}] {m['role']}: {str(m.get('content',''))[:120]}") - - result = _call_gemini(full_history, model) - - log.info(f"[Gemini] 回复文本: {result[:150]}...") - log.info(f"[Gemini] 无工具调用(Gemini 不支持 function calling)") - - return result, None - - -def _call_vision_llm(user_message: str, image_base64: str, history: List[dict], model_override: str = None) -> str: - """调用视觉模型分析图片(自动路由 Qwen / Gemini)""" - - use_model = model_override or settings.AI_VISION_MODEL - - # ---------- Gemini 路由 ---------- - if _is_gemini_model(use_model): - log.info(f"[Vision] 使用 Gemini 视觉模型: {use_model}") - msgs = [{"role": "system", "content": VISION_PROMPT}] - for h in history[-10:]: - role = h["role"] if h["role"] != "tool" else "user" - msgs.append({"role": role, "content": h["content"]}) - msgs.append({"role": "user", "content": user_message}) - return _call_gemini(msgs, use_model, images_b64=[image_base64]) - - # ---------- Qwen / OpenAI 路由 ---------- - from openai import OpenAI - - client = OpenAI( - api_key=settings.AI_API_KEY, - base_url=settings.AI_BASE_URL or "https://api.openai.com/v1", - ) - - # 构造多模态消息 - user_content = [ - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"} - }, - { - "type": "text", - "text": user_message - } - ] - - # 组装消息列表(历史 + 当前图片消息) - messages = [ - {"role": "system", "content": VISION_PROMPT}, - ] - # 添加近期历史(纯文本) - for h in history[-10:]: - role = h["role"] if h["role"] != "tool" else "user" - messages.append({"role": role, "content": h["content"]}) - # 当前带图片的用户消息 - messages.append({"role": "user", "content": user_content}) - - completion = client.chat.completions.create( - model=use_model, - messages=messages, - ) - - return completion.choices[0].message.content or "" - - -def _mock_reply(message: str, has_image: bool = False) -> str: - """未配置 API Key 时的模拟回复""" - if has_image: - return "【模拟分析】收到图片。AI 分析功能需要配置 AI_API_KEY 和 AI_VISION_MODEL。\n\n配置后可以分析:服装类别、面料、颜色印花、版型、工艺细节等。" - if "套图" in message: - return "套图功能在「参数预设」页面。先选择花样组和裁片组,再添加规则,最后点击生成。" - elif "对齐" in message: - return "图层对齐功能在「参数预设」页面顶部。支持上下左右居中对齐、领口对齐等。" - elif "裁片" in message or "PLT" in message: - return "PLT 裁片处理在「PLT 裁片处理」页面。上传 PLT 文件,选择尺码,点击开始处理。" - else: - return "你好!我是 DesignerCEP AI 助手。你可以问我关于套图、裁片、对齐等功能的问题。" - # ==================== 图案生成 & 验证 ==================== @@ -638,7 +356,7 @@ async def generate_preview( "请从这件成衣中提取面料花样,生成一张干净的花样平铺图。" ) - result_url, desc = _call_image_model( + result_url, desc = call_image_model( images_b64=images, prompt=data.prompt or default_prompt, model_override=data.image_edit_model, @@ -755,7 +473,7 @@ async def refine_piece( log.info(f"[RefinePiece] 提示词: {prompt[:200]}") - result_url, desc = _call_image_model( + result_url, desc = call_image_model( images_b64=[data.cropped_base64], prompt=prompt, model_override=data.image_edit_model, @@ -844,7 +562,7 @@ async def extract_piece_pattern( f"将该区域的花样输出为一张完整的矩形图片。" f"要求:只保留花样内容,去掉轮廓线,填满整个矩形,保持清晰。" ) - result_url, desc = _call_image_model( + result_url, desc = call_image_model( images_b64=[data.preview_base64], prompt=prompt, model_override=getattr(data, 'image_edit_model', None), @@ -940,14 +658,14 @@ type 只有四种: 只返回 JSON。""" # ---------- Gemini 路由 ---------- - if _is_gemini_model(use_model): + if is_gemini_model(use_model): log.info(f"[IdentifyPieces] 使用 Gemini 视觉: {use_model}") images = [] if garment_b64: images.append(garment_b64) images.append(canvas_b64) - content = _call_gemini( + content = call_gemini( [{"role": "user", "content": prompt}], use_model, images_b64=images @@ -1006,265 +724,10 @@ async def verify_result( raise HTTPException(400, "AI_API_KEY 未配置") try: - feedback = _verify_pattern_result(data.garment_base64, data.canvas_base64, data.prompt, vision_model=data.vision_model) + feedback = verify_pattern_result(data.garment_base64, data.canvas_base64, data.prompt, vision_model=data.vision_model) return {"code": 200, "data": {"feedback": feedback}} except Exception as e: log.error(f"验证失败: {e}", exc_info=True) raise HTTPException(500, f"验证失败: {str(e)}") -# ==================== 公共:图片模型调用 + 响应解析 ==================== - -def _call_image_model(images_b64: List[str], prompt: str, model_override: str = None) -> tuple: - """ - 调用图片编辑/生成模型(自动路由 DashScope / Gemini) - 返回 (result_url_or_base64, description) - """ - import requests as http_requests - - use_model = model_override or settings.AI_IMAGE_EDIT_MODEL - - # ---------- Gemini 图片生成路由(OpenAI 兼容格式,走代理) ---------- - if _is_gemini_model(use_model): - log.info(f"[ImageModel] 使用 Gemini 图片模型: {use_model}") - - if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL: - raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置") - - from openai import OpenAI as _OpenAI - - # 第三方代理走 OpenAI 兼容接口(/v1/chat/completions) - client = _OpenAI( - api_key=settings.GEMINI_API_KEY, - base_url=f"{settings.GEMINI_BASE_URL}/v1", - ) - - # 构造 OpenAI 格式的多模态消息(和正常调用一样) - content_parts = [{"type": "text", "text": prompt}] - for img_b64 in images_b64: - content_parts.append({ - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"} - }) - - log.info(f"[ImageModel] Gemini OpenAI 兼容模式") - log.info(f"[ImageModel] 模型: {use_model}") - log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图大小: {[len(b)//1024 for b in images_b64]}KB") - log.info(f"[ImageModel] 提示词: {prompt[:200]}") - - completion = client.chat.completions.create( - model=use_model, - messages=[{"role": "user", "content": content_parts}], - ) - - result_content = completion.choices[0].message.content or "" - log.info(f"[ImageModel] Gemini 回复长度: {len(result_content)} chars") - - # 从 markdown 中提取 base64 图片:![...](data:image/...;base64,...) - import re - match = re.search(r'!\[.*?\]\((data:image/(\w+);base64,([^)]+))\)', result_content) - - if not match: - # 也尝试直接匹配 data URI(有些响应不带 markdown 格式) - match2 = re.search(r'(data:image/(\w+);base64,([A-Za-z0-9+/=]+))', result_content) - if match2: - match = match2 - - if not match: - log.warning(f"[ImageModel] Gemini 响应中无图片,前500字: {result_content[:500]}") - raise ValueError("Gemini 未返回图片,请检查模型是否支持图片生成") - - data_uri = match.group(1) - img_format = match.group(2) - image_b64 = match.group(3) - - # base64 填充修正 - padding = 4 - len(image_b64) % 4 - if padding != 4: - image_b64 += '=' * padding - - log.info(f"[ImageModel] Gemini 返回图片: image/{img_format}, {len(image_b64)//1024}KB") - - # 保存调试图片 - import os, time as _time - debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images') - os.makedirs(debug_dir, exist_ok=True) - ts = int(_time.time()) - debug_path = os.path.join(debug_dir, f'{ts}_gemini_output.{img_format}') - try: - with open(debug_path, 'wb') as f: - f.write(base64.b64decode(image_b64)) - log.info(f"[ImageModel] Gemini 输出图片已保存: {debug_path}") - except Exception as e: - log.warning(f"[ImageModel] 保存调试图片失败: {e}") - - # 提取文本描述(去掉图片部分) - description = re.sub(r'!\[.*?\]\(data:image/[^)]+\)', '', result_content).strip() - - # 返回 data URI(前端可直接用于 ) - image_data_uri = f"data:image/{img_format};base64,{image_b64}" - log.info(f"[ImageModel] Gemini 生成完成") - return image_data_uri, description - - # ---------- DashScope 原生接口 ---------- - # DashScope 图片编辑模型用原生接口,不用 /compatible-mode - api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation" - - # 构造 DashScope 原生格式的消息 - content_parts = [] - for img_b64 in images_b64: - content_parts.append({"image": f"data:image/jpeg;base64,{img_b64}"}) - content_parts.append({"text": prompt}) - - payload = { - "model": use_model, - "input": { - "messages": [{ - "role": "user", - "content": content_parts - }] - }, - "parameters": { - "n": 1, - "watermark": False, - "prompt_extend": True, - } - } - - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {settings.AI_API_KEY}", - } - - log.info(f"{'='*60}") - log.info(f"[ImageModel] 调用 DashScope 原生 API") - log.info(f"[ImageModel] 模型: {use_model}") - log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图大小: {[len(b)//1024 for b in images_b64]}KB") - log.info(f"[ImageModel] 提示词: {prompt[:200]}") - log.info(f"[ImageModel] 端点: {api_url}") - - # 调试:把发给模型的图片保存到磁盘 - import os - debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images') - os.makedirs(debug_dir, exist_ok=True) - import time - ts = int(time.time()) - for idx, img_b64 in enumerate(images_b64): - debug_path = os.path.join(debug_dir, f'{ts}_input_{idx}.jpg') - try: - with open(debug_path, 'wb') as f: - f.write(base64.b64decode(img_b64)) - log.info(f"[ImageModel] 调试图片已保存: {debug_path}") - except Exception as e: - log.warning(f"[ImageModel] 保存调试图片失败: {e}") - - resp = http_requests.post(api_url, json=payload, headers=headers, timeout=120) - - if resp.status_code != 200: - error_data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {} - err_msg = error_data.get("message", resp.text[:300]) - err_code = error_data.get("code", "") - log.error(f"[ImageModel] API 错误: {resp.status_code} {err_code} {err_msg}") - if "data_inspection_failed" in str(error_data): - raise ValueError("图片内容未通过安全审核,请更换图片") - raise ValueError(f"图片模型调用失败({resp.status_code}): {err_msg}") - - data = resp.json() - log.info(f"[ImageModel] 响应 keys: {list(data.keys())}") - - # 解析响应:output.choices[0].message.content[].image - output = data.get("output", {}) - choices = output.get("choices", []) - - if not choices: - log.warning(f"[ImageModel] 无 choices: {str(data)[:500]}") - raise ValueError("模型未返回结果") - - content_list = choices[0].get("message", {}).get("content", []) - result_b64 = None - description = "" - - image_url = None - for item in content_list: - if isinstance(item, dict): - if "image" in item: - image_url = item["image"] - log.info(f"[ImageModel] 获取到图片 URL: {image_url[:100]}...") - elif "text" in item: - description += item["text"] - - if not image_url: - log.warning(f"[ImageModel] content 中无图片: {str(content_list)[:500]}") - raise ValueError("模型未返回图片") - - # 调试:保存输出图片 URL - log.info(f"[ImageModel] 输出图片 URL: {image_url[:120]}...") - try: - import os, time - debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images') - ts = int(time.time()) - out_resp = http_requests.get(image_url, timeout=60) - if out_resp.status_code == 200: - debug_path = os.path.join(debug_dir, f'{ts}_output.png') - with open(debug_path, 'wb') as f: - f.write(out_resp.content) - log.info(f"[ImageModel] 输出图片已保存: {debug_path}") - except Exception as e: - log.warning(f"[ImageModel] 保存输出图片失败: {e}") - - return image_url, description - - -def _verify_pattern_result(garment_b64: str, canvas_b64: str, extra_prompt: str = None, vision_model: str = None) -> str: - """用视觉模型对比原始成衣和套图结果(自动路由 Qwen/Gemini)""" - - use_model = vision_model or settings.AI_VISION_MODEL - - log.info(f"{'='*60}") - log.info(f"[Verify] 调用视觉模型验证套图效果") - log.info(f"[Verify] 模型: {use_model}") - log.info(f"[Verify] 成衣图: {len(garment_b64)//1024}KB, 画布图: {len(canvas_b64)//1024}KB") - if extra_prompt: - log.info(f"[Verify] 用户补充: {extra_prompt[:100]}") - - verify_prompt = ( - "请对比这两张图片:第一张是原始成衣照片,第二张是套图结果。\n" - "验证:1. 花样还原度(颜色/比例/方向)2. 裁片覆盖完整度 " - "3. 对齐质量 4. 整体效果。给出评分(1-10)和具体改进建议。" - ) - if extra_prompt: - verify_prompt += f"\n用户补充:{extra_prompt}" - - # ---------- Gemini 路由 ---------- - if _is_gemini_model(use_model): - log.info(f"[Verify] 使用 Gemini 视觉: {use_model}") - return _call_gemini( - [ - {"role": "user", "content": "你是服装套图质量检验专家。"}, - {"role": "user", "content": verify_prompt}, - ], - use_model, - images_b64=[garment_b64, canvas_b64] - ) - - # ---------- Qwen / OpenAI 路由 ---------- - from openai import OpenAI - - client = OpenAI( - api_key=settings.AI_API_KEY, - base_url=settings.AI_BASE_URL or "https://api.openai.com/v1", - ) - - completion = client.chat.completions.create( - model=use_model, - messages=[ - {"role": "system", "content": "你是服装套图质量检验专家。"}, - {"role": "user", "content": [ - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{garment_b64}"}}, - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{canvas_b64}"}}, - {"type": "text", "text": verify_prompt}, - ]}, - ], - ) - - return completion.choices[0].message.content or "" diff --git a/Server/app/api/v1/ai_llm.py b/Server/app/api/v1/ai_llm.py new file mode 100644 index 0000000..e0a7cf7 --- /dev/null +++ b/Server/app/api/v1/ai_llm.py @@ -0,0 +1,414 @@ +# -*- coding: utf-8 -*- +""" +AI 模型调用层 +统一管理所有 LLM / Vision / Image 模型的调用逻辑 +支持 Qwen (DashScope) 和 Gemini (第三方代理) 两套路由 +""" + +from typing import List +import json, base64, re, logging +from app.core.config import settings +from app.api.v1.ai_tools import PS_TOOLS, TOOL_DISPLAY_NAMES + +log = logging.getLogger(__name__) + +# ==================== Prompts ==================== + +SYSTEM_PROMPT = """你是 DesignerCEP 的 AI 助手,运行在 Adobe Photoshop CEP 插件中。 + +你的能力: +1. 回答关于 Photoshop 操作和插件使用的问题 +2. 通过工具直接操作 Photoshop(创建图层、对齐、查看文档信息等) +3. 帮用户排查操作中遇到的错误 +4. **AI 智能套图**:两阶段流程 — 先生成预览确认,再提取套到裁片上 + +## AI 智能套图流程(两阶段) + +当用户上传成衣图片并要求套图时: + +### 阶段 1 — 识别裁片 + 生成预览(需用户确认) +1. 调用 **identify_pieces** — 截取画布并识别每个图层是什么裁片部位(前片、后片、袖子等) +2. 告诉用户识别结果(如 "M-1=前片, M-2=后片, M-5=左袖...") +3. 调用 **generate_garment_preview** — 会自动在裁片下方标注名称标签(如 "M-1 前片"),然后截取带标签的画布 + 成衣照片一起发给 AI 生成预览 +4. 预览图显示在聊天中,每个裁片都有标签,**等用户确认 OK 后**进入阶段 2 + +### 阶段 2 — 提取花样 + 正式套图(用户确认后) +5. 根据 identify_pieces 分析结果中每个裁片的 type 决定处理方式: + - solid → 设 color 字段,PS 直接纯色填充 + - fill_pattern → AI 提取花型铺满 + - theme_pattern → 底层 PS 纯色填充(设 color)+ 上层 AI 提取主题图案(白底+正片叠底) + - mixed_pattern → 底层 AI 提取花型 + 上层 AI 提取主题图案(白底+正片叠底) +6. 调用 **extract_and_apply_all_pieces** 执行套图 +7. 可选:调用 **verify_pattern_result** 验证效果 + +重要: +- 阶段 1 完成后必须**等用户说"可以"/"OK"**才执行阶段 2 +- 用户可能会说"袖子纯色"、"后幅不要花样"等,要根据 identify_pieces 的结果对应到正确的图层名 + +重要规则: +- 当用户要求执行 PS 操作时,使用工具完成 +- 执行操作前可以先了解当前文档和图层状态 +- 用简洁的中文回答,适合在小面板中阅读 +- 如果工具执行失败,向用户解释原因并建议解决方案 +""" + +VISION_PROMPT = """你是一位资深的服装设计分析师,同时也是 DesignerCEP 的 AI 助手。 + +当用户发送服装/成衣图片时,请从以下维度进行专业分析: + +1. **服装类别** — 上衣/裤子/裙子/连衣裙/外套/配饰等,细分款式 +2. **面料分析** — 根据视觉特征推测面料类型(棉、涤纶、丝绸、针织、牛仔、雪纺等),分析面料质感 +3. **颜色与印花** — 主色调、配色方案、印花/图案类型及工艺(数码印花、丝网印刷、提花等) +4. **版型特点** — 修身/宽松/A字/H型等,分析领口、袖型、肩线、腰线、下摆处理 +5. **工艺细节** — 缝线工艺、拉链/纽扣/暗扣、口袋设计、装饰细节、包边/锁边 +6. **设计评价** — 设计亮点、风格定位(休闲/正装/运动/时尚等)、目标消费群体 +7. **改进建议** — 如有可改进之处,给出专业建议 + +规则: +- 如果图片不是服装相关,也请尽力分析图片内容并给出有价值的反馈 +- 如果用户同时提了文字问题,请结合图片和问题一起回答 +- 用清晰、结构化的中文回答,适合在设计工作中参考 +- 回答要专业但不啰嗦,突出重点信息 +""" + + +# ==================== 路由判断 ==================== + +def is_gemini_model(model_name: str) -> bool: + """判断是否是 Gemini 模型""" + return bool(model_name) and "gemini" in model_name.lower() + + +# ==================== Qwen / OpenAI 兼容调用 ==================== + +def call_llm_with_tools(messages_history: List[dict], model_override: str = None): + """调用 LLM,支持 function calling""" + from openai import OpenAI + + use_model = model_override or settings.AI_MODEL + + client = OpenAI( + api_key=settings.AI_API_KEY, + base_url=settings.AI_BASE_URL or "https://api.openai.com/v1", + ) + + messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history + + log.info(f"{'='*60}") + log.info(f"[LLM] 调用工具模型: {use_model}{' (override)' if model_override else ''}") + log.info(f"[LLM] 消息数量: {len(messages)} (system + {len(messages_history)} history)") + for i, m in enumerate(messages_history[-5:]): + role = m['role'] + content = m['content'][:120] if m.get('content') else '(empty)' + log.info(f"[LLM] history[-{len(messages_history)-i}] {role}: {content}") + log.info(f"[LLM] 工具数量: {len(PS_TOOLS)}") + + completion = client.chat.completions.create( + model=use_model, + messages=messages, + tools=PS_TOOLS, + tool_choice="auto", + ) + + choice = completion.choices[0] + message = choice.message + + log.info(f"[LLM] 响应 finish_reason={choice.finish_reason}") + if message.content: + log.info(f"[LLM] 回复文本: {message.content[:150]}...") + if message.tool_calls: + for tc in message.tool_calls: + log.info(f"[LLM] 工具调用: {tc.function.name}({tc.function.arguments[:200]})") + else: + log.info(f"[LLM] 无工具调用") + log.info(f"{'='*60}") + + if message.tool_calls and len(message.tool_calls) > 0: + tool_calls_data = [] + for tc in message.tool_calls: + args = {} + if tc.function.arguments: + try: + args = json.loads(tc.function.arguments) + except json.JSONDecodeError: + args = {} + tool_calls_data.append({ + "id": tc.id, + "name": tc.function.name, + "display_name": TOOL_DISPLAY_NAMES.get(tc.function.name, tc.function.name), + "args": args, + "status": "pending" + }) + return message.content or "", tool_calls_data + + return message.content or "", None + + +# ==================== Gemini 调用(OpenAI 兼容代理) ==================== + +def call_gemini(messages_history: List[dict], model: str, images_b64: List[str] = None) -> str: + """调用 Gemini(通过第三方代理,OpenAI 兼容格式)""" + from openai import OpenAI as _OpenAI + + if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL: + raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置") + + client = _OpenAI( + api_key=settings.GEMINI_API_KEY, + base_url=f"{settings.GEMINI_BASE_URL}/v1", + ) + + messages = [] + for msg in messages_history: + role = msg.get("role", "user") + content = msg.get("content", "") + if role not in ("system", "user", "assistant"): + role = "user" + messages.append({"role": role, "content": content}) + + if images_b64: + last_user_idx = None + for i in range(len(messages) - 1, -1, -1): + if messages[i]["role"] == "user": + last_user_idx = i + break + if last_user_idx is not None: + text_content = messages[last_user_idx]["content"] + multimodal_content = [] + for img_b64 in images_b64: + multimodal_content.append({ + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"} + }) + multimodal_content.append({"type": "text", "text": text_content}) + messages[last_user_idx]["content"] = multimodal_content + + log.info(f"{'='*60}") + log.info(f"[Gemini] 调用模型: {model} (OpenAI 兼容)") + log.info(f"[Gemini] 消息数: {len(messages)}, 图片数: {len(images_b64) if images_b64 else 0}") + + completion = client.chat.completions.create(model=model, messages=messages) + result = completion.choices[0].message.content or "" + log.info(f"[Gemini] 回复: {result[:200]}...") + return result + + +def call_gemini_with_tools(messages_history: List[dict], model: str) -> tuple: + """用 Gemini 做对话(不支持 function calling)""" + full_history = [{"role": "system", "content": SYSTEM_PROMPT}] + messages_history + + log.info(f"{'='*60}") + log.info(f"[Gemini] 调用对话模型: {model}") + for i, m in enumerate(messages_history[-3:]): + log.info(f"[Gemini] history[-{len(messages_history)-i}] {m['role']}: {str(m.get('content',''))[:120]}") + + result = call_gemini(full_history, model) + log.info(f"[Gemini] 回复文本: {result[:150]}...") + return result, None + + +# ==================== 视觉模型 ==================== + +def call_vision_llm(user_message: str, image_base64: str, history: List[dict], model_override: str = None) -> str: + """调用视觉模型分析图片(自动路由 Qwen / Gemini)""" + use_model = model_override or settings.AI_VISION_MODEL + + if is_gemini_model(use_model): + log.info(f"[Vision] 使用 Gemini 视觉模型: {use_model}") + msgs = [{"role": "system", "content": VISION_PROMPT}] + for h in history[-10:]: + role = h["role"] if h["role"] != "tool" else "user" + msgs.append({"role": role, "content": h["content"]}) + msgs.append({"role": "user", "content": user_message}) + return call_gemini(msgs, use_model, images_b64=[image_base64]) + + from openai import OpenAI + client = OpenAI(api_key=settings.AI_API_KEY, base_url=settings.AI_BASE_URL or "https://api.openai.com/v1") + + user_content = [ + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}, + {"type": "text", "text": user_message} + ] + messages = [{"role": "system", "content": VISION_PROMPT}] + for h in history[-10:]: + role = h["role"] if h["role"] != "tool" else "user" + messages.append({"role": role, "content": h["content"]}) + messages.append({"role": "user", "content": user_content}) + + completion = client.chat.completions.create(model=use_model, messages=messages) + return completion.choices[0].message.content or "" + + +# ==================== 图片编辑/生成模型 ==================== + +def call_image_model(images_b64: List[str], prompt: str, model_override: str = None) -> tuple: + """调用图片编辑/生成模型(自动路由 DashScope / Gemini),返回 (url_or_datauri, description)""" + import requests as http_requests + + use_model = model_override or settings.AI_IMAGE_EDIT_MODEL + + # ---------- Gemini(OpenAI 兼容代理) ---------- + if is_gemini_model(use_model): + log.info(f"[ImageModel] 使用 Gemini 图片模型: {use_model}") + if not settings.GEMINI_API_KEY or not settings.GEMINI_BASE_URL: + raise ValueError("GEMINI_API_KEY 或 GEMINI_BASE_URL 未配置") + + from openai import OpenAI as _OpenAI + client = _OpenAI(api_key=settings.GEMINI_API_KEY, base_url=f"{settings.GEMINI_BASE_URL}/v1") + + content_parts = [{"type": "text", "text": prompt}] + for img_b64 in images_b64: + content_parts.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}) + + log.info(f"[ImageModel] Gemini OpenAI 兼容, 模型: {use_model}") + log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图: {[len(b)//1024 for b in images_b64]}KB") + log.info(f"[ImageModel] 提示词: {prompt[:200]}") + + completion = client.chat.completions.create(model=use_model, messages=[{"role": "user", "content": content_parts}]) + result_content = completion.choices[0].message.content or "" + log.info(f"[ImageModel] Gemini 回复长度: {len(result_content)} chars") + + # 提取 base64 图片 + match = re.search(r'!\[.*?\]\((data:image/(\w+);base64,([^)]+))\)', result_content) + if not match: + match = re.search(r'(data:image/(\w+);base64,([A-Za-z0-9+/=]+))', result_content) + if not match: + log.warning(f"[ImageModel] Gemini 响应中无图片,前500字: {result_content[:500]}") + raise ValueError("Gemini 未返回图片,请检查模型是否支持图片生成") + + img_format = match.group(2) + image_b64 = match.group(3) + padding = 4 - len(image_b64) % 4 + if padding != 4: + image_b64 += '=' * padding + + log.info(f"[ImageModel] Gemini 返回图片: image/{img_format}, {len(image_b64)//1024}KB") + _save_debug_image(base64.b64decode(image_b64), f'gemini_output.{img_format}') + + description = re.sub(r'!\[.*?\]\(data:image/[^)]+\)', '', result_content).strip() + return f"data:image/{img_format};base64,{image_b64}", description + + # ---------- DashScope 原生接口 ---------- + api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation" + content_parts = [] + for img_b64 in images_b64: + content_parts.append({"image": f"data:image/jpeg;base64,{img_b64}"}) + content_parts.append({"text": prompt}) + + payload = { + "model": use_model, + "input": {"messages": [{"role": "user", "content": content_parts}]}, + "parameters": {"n": 1, "watermark": False, "prompt_extend": True} + } + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {settings.AI_API_KEY}"} + + log.info(f"{'='*60}") + log.info(f"[ImageModel] DashScope 原生 API, 模型: {use_model}") + log.info(f"[ImageModel] 图片数: {len(images_b64)}, 各图: {[len(b)//1024 for b in images_b64]}KB") + log.info(f"[ImageModel] 提示词: {prompt[:200]}") + + for idx, img_b64 in enumerate(images_b64): + _save_debug_image(base64.b64decode(img_b64), f'input_{idx}.jpg') + + resp = http_requests.post(api_url, json=payload, headers=headers, timeout=120) + if resp.status_code != 200: + error_data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {} + err_msg = error_data.get("message", resp.text[:300]) + log.error(f"[ImageModel] API 错误: {resp.status_code} {err_msg}") + if "data_inspection_failed" in str(error_data): + raise ValueError("图片内容未通过安全审核,请更换图片") + raise ValueError(f"图片模型调用失败({resp.status_code}): {err_msg}") + + data = resp.json() + output = data.get("output", {}) + choices = output.get("choices", []) + if not choices: + raise ValueError("模型未返回结果") + + content_list = choices[0].get("message", {}).get("content", []) + image_url = None + description = "" + for item in content_list: + if isinstance(item, dict): + if "image" in item: + image_url = item["image"] + elif "text" in item: + description += item["text"] + + if not image_url: + raise ValueError("模型未返回图片") + + log.info(f"[ImageModel] 输出图片 URL: {image_url[:120]}...") + try: + out_resp = http_requests.get(image_url, timeout=60) + if out_resp.status_code == 200: + _save_debug_image(out_resp.content, 'output.png') + except Exception: + pass + + return image_url, description + + +# ==================== 验证套图效果 ==================== + +def verify_pattern_result(garment_b64: str, canvas_b64: str, extra_prompt: str = None, vision_model: str = None) -> str: + """用视觉模型对比原始成衣和套图结果""" + use_model = vision_model or settings.AI_VISION_MODEL + + log.info(f"[Verify] 模型: {use_model}, 成衣: {len(garment_b64)//1024}KB, 画布: {len(canvas_b64)//1024}KB") + + verify_prompt = ( + "请对比这两张图片:第一张是原始成衣照片,第二张是套图结果。\n" + "验证:1. 花样还原度 2. 裁片覆盖完整度 3. 对齐质量 4. 整体效果。给出评分(1-10)和改进建议。" + ) + if extra_prompt: + verify_prompt += f"\n用户补充:{extra_prompt}" + + if is_gemini_model(use_model): + return call_gemini( + [{"role": "user", "content": "你是服装套图质量检验专家。"}, {"role": "user", "content": verify_prompt}], + use_model, images_b64=[garment_b64, canvas_b64] + ) + + from openai import OpenAI + client = OpenAI(api_key=settings.AI_API_KEY, base_url=settings.AI_BASE_URL or "https://api.openai.com/v1") + completion = client.chat.completions.create( + model=use_model, + messages=[ + {"role": "system", "content": "你是服装套图质量检验专家。"}, + {"role": "user", "content": [ + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{garment_b64}"}}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{canvas_b64}"}}, + {"type": "text", "text": verify_prompt}, + ]}, + ], + ) + return completion.choices[0].message.content or "" + + +# ==================== Mock ==================== + +def mock_reply(message: str, has_image: bool = False) -> str: + """未配置 API Key 时的模拟回复""" + if has_image: + return "【模拟分析】收到图片。AI 分析功能需要配置 AI_API_KEY。" + if "套图" in message: + return "套图功能在「参数预设」页面。先选择花样组和裁片组,再添加规则,最后点击生成。" + return "你好!我是 DesignerCEP AI 助手。你可以问我关于套图、裁片、对齐等功能的问题。" + + +# ==================== 工具函数 ==================== + +def _save_debug_image(data: bytes, filename: str): + """保存调试图片到 debug_images 目录""" + import os, time + debug_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'debug_images') + os.makedirs(debug_dir, exist_ok=True) + ts = int(time.time()) + path = os.path.join(debug_dir, f'{ts}_{filename}') + try: + with open(path, 'wb') as f: + f.write(data) + log.info(f"[Debug] 图片已保存: {path}") + except Exception as e: + log.warning(f"[Debug] 保存失败: {e}") diff --git a/Server/app/api/v1/ai_tools.py b/Server/app/api/v1/ai_tools.py index 39d0bdf..132a043 100644 --- a/Server/app/api/v1/ai_tools.py +++ b/Server/app/api/v1/ai_tools.py @@ -218,6 +218,459 @@ PS_TOOLS = [ } } }, + # ==================== PS 通用操作工具 ==================== + {"type": "function", "function": { + "name": "move_layer", + "description": "移动图层。dx 正值向右,dy 正值向下(单位像素)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "dx": {"type": "number", "description": "水平位移(像素,正=右)"}, + "dy": {"type": "number", "description": "垂直位移(像素,正=下)"} + }, "required": ["name", "dx", "dy"]} + }}, + {"type": "function", "function": { + "name": "resize_layer", + "description": "缩放图层(百分比)。100=不变,200=放大2倍,50=缩小一半", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "scale_x": {"type": "number", "description": "水平缩放百分比"}, + "scale_y": {"type": "number", "description": "垂直缩放百分比(省略则与 scale_x 相同)"} + }, "required": ["name", "scale_x"]} + }}, + {"type": "function", "function": { + "name": "rotate_layer", + "description": "旋转图层(角度,正值=顺时针)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "angle": {"type": "number", "description": "旋转角度(正=顺时针)"} + }, "required": ["name", "angle"]} + }}, + {"type": "function", "function": { + "name": "flip_layer", + "description": "翻转图层", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "direction": {"type": "string", "enum": ["horizontal", "vertical"], "description": "翻转方向"} + }, "required": ["name", "direction"]} + }}, + {"type": "function", "function": { + "name": "set_layer_opacity", + "description": "设置图层透明度(0=完全透明,100=完全不透明)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "opacity": {"type": "number", "description": "透明度 0-100"} + }, "required": ["name", "opacity"]} + }}, + {"type": "function", "function": { + "name": "set_layer_blend_mode", + "description": "设置图层混合模式", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "mode": {"type": "string", "enum": ["normal","multiply","screen","overlay","darken","lighten","color_dodge","color_burn","soft_light","hard_light","difference","exclusion","hue","saturation","color","luminosity"], "description": "混合模式"} + }, "required": ["name", "mode"]} + }}, + {"type": "function", "function": { + "name": "set_layer_visible", + "description": "显示或隐藏图层", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "visible": {"type": "boolean", "description": "true=显示,false=隐藏"} + }, "required": ["name", "visible"]} + }}, + {"type": "function", "function": { + "name": "set_text_content", + "description": "修改文字图层的文本内容", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "文字图层名称"}, + "text": {"type": "string", "description": "新的文本内容"} + }, "required": ["name", "text"]} + }}, + {"type": "function", "function": { + "name": "set_text_color", + "description": "修改文字图层的颜色", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "文字图层名称"}, + "color": {"type": "string", "description": "颜色 hex 值,如 #FF0000"} + }, "required": ["name", "color"]} + }}, + {"type": "function", "function": { + "name": "set_text_size", + "description": "修改文字图层的字号(像素)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "文字图层名称"}, + "size": {"type": "number", "description": "字号(像素)"} + }, "required": ["name", "size"]} + }}, + {"type": "function", "function": { + "name": "group_layers", + "description": "将多个图层编组", + "parameters": {"type": "object", "properties": { + "layer_names": {"type": "array", "items": {"type": "string"}, "description": "要编组的图层名称列表"}, + "group_name": {"type": "string", "description": "组名称"} + }, "required": ["layer_names", "group_name"]} + }}, + {"type": "function", "function": { + "name": "merge_visible", + "description": "合并所有可见图层", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "flatten_image", + "description": "拼合图像(所有图层合并为背景层)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "resize_canvas", + "description": "调整画布大小(像素)", + "parameters": {"type": "object", "properties": { + "width": {"type": "number", "description": "画布宽度(像素)"}, + "height": {"type": "number", "description": "画布高度(像素)"}, + "anchor": {"type": "string", "enum": ["top-left","top","top-right","left","center","right","bottom-left","bottom","bottom-right"], "description": "锚点位置(默认 center)"} + }, "required": ["width", "height"]} + }}, + {"type": "function", "function": { + "name": "adjust_brightness_contrast", + "description": "调整当前图层的亮度和对比度", + "parameters": {"type": "object", "properties": { + "brightness": {"type": "number", "description": "亮度 -150 到 150"}, + "contrast": {"type": "number", "description": "对比度 -50 到 100"} + }, "required": ["brightness", "contrast"]} + }}, + {"type": "function", "function": { + "name": "desaturate", + "description": "将当前图层去色(变为灰度)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "rasterize_layer", + "description": "栅格化图层(将智能对象/文字/形状转为像素图层)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "align_layers_tool", + "description": "对齐多个图层", + "parameters": {"type": "object", "properties": { + "layer_names": {"type": "array", "items": {"type": "string"}, "description": "要对齐的图层名"}, + "alignment": {"type": "string", "enum": ["left","center_h","right","top","center_v","bottom"], "description": "对齐方式"} + }, "required": ["layer_names", "alignment"]} + }}, + {"type": "function", "function": { + "name": "distribute_layers", + "description": "等距分布多个图层", + "parameters": {"type": "object", "properties": { + "layer_names": {"type": "array", "items": {"type": "string"}, "description": "要分布的图层名"}, + "direction": {"type": "string", "enum": ["horizontal","vertical"], "description": "分布方向"} + }, "required": ["layer_names", "direction"]} + }}, + {"type": "function", "function": { + "name": "add_stroke", + "description": "给图层添加描边效果", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "size": {"type": "number", "description": "描边宽度(像素)"}, + "color": {"type": "string", "description": "描边颜色 hex,如 #FF0000"}, + "position": {"type": "string", "enum": ["outside","inside","center"], "description": "描边位置(默认 outside)"} + }, "required": ["name", "size", "color"]} + }}, + {"type": "function", "function": { + "name": "create_clipping_mask", + "description": "为图层创建剪贴蒙版(裁切到下方图层形状)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "release_clipping_mask", + "description": "释放图层的剪贴蒙版", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "undo", + "description": "撤销操作(Ctrl+Z),可指定步数", + "parameters": {"type": "object", "properties": { + "steps": {"type": "number", "description": "撤销步数(默认 1)"} + }, "required": []} + }}, + {"type": "function", "function": { + "name": "get_layer_bounds", + "description": "获取图层的位置和尺寸信息(left/top/width/height/center)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "create_document", + "description": "新建 Photoshop 文档", + "parameters": {"type": "object", "properties": { + "width": {"type": "number", "description": "宽度(像素)"}, + "height": {"type": "number", "description": "高度(像素)"}, + "resolution": {"type": "number", "description": "分辨率 DPI(默认 150)"}, + "name": {"type": "string", "description": "文档名称"} + }, "required": ["width", "height"]} + }}, + {"type": "function", "function": { + "name": "save_document", + "description": "保存当前文档(Ctrl+S)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "resize_image", + "description": "调整图像大小(像素/分辨率)", + "parameters": {"type": "object", "properties": { + "width": {"type": "number", "description": "新宽度(像素)"}, + "height": {"type": "number", "description": "新高度(像素)"}, + "resolution": {"type": "number", "description": "新分辨率 DPI(可选)"} + }, "required": ["width", "height"]} + }}, + {"type": "function", "function": { + "name": "gaussian_blur", + "description": "对当前图层应用高斯模糊", + "parameters": {"type": "object", "properties": { + "radius": {"type": "number", "description": "模糊半径(像素)"} + }, "required": ["radius"]} + }}, + {"type": "function", "function": { + "name": "auto_levels", + "description": "自动色阶(快速修正色调范围)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "auto_contrast", + "description": "自动对比度", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "invert_colors", + "description": "反相(颜色取反)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "convert_to_rgb", + "description": "转换为 RGB 色彩模式", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "convert_to_cmyk", + "description": "转换为 CMYK 色彩模式(印刷用)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "add_drop_shadow", + "description": "给图层添加投影效果", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "distance": {"type": "number", "description": "投影距离(像素)"}, + "size": {"type": "number", "description": "投影大小/模糊(像素)"}, + "opacity": {"type": "number", "description": "投影不透明度 0-100(默认 75)"}, + "angle": {"type": "number", "description": "光照角度(默认 120)"} + }, "required": ["name", "distance", "size"]} + }}, + {"type": "function", "function": { + "name": "clear_layer_effects", + "description": "清除图层的所有图层样式(投影/描边/发光等)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "bring_to_front", + "description": "将图层移到最顶层", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "send_to_back", + "description": "将图层移到最底层", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "crop_document", + "description": "裁切画布到指定区域(像素坐标)", + "parameters": {"type": "object", "properties": { + "left": {"type": "number"}, "top": {"type": "number"}, + "right": {"type": "number"}, "bottom": {"type": "number"} + }, "required": ["left", "top", "right", "bottom"]} + }}, + {"type": "function", "function": { + "name": "trim_document", + "description": "自动裁切(去除透明或纯色边缘)", + "parameters": {"type": "object", "properties": { + "type": {"type": "string", "enum": ["transparent", "topleft"], "description": "裁切依据(默认 topleft 即左上角颜色)"} + }, "required": []} + }}, + {"type": "function", "function": { + "name": "add_guide", + "description": "添加参考线", + "parameters": {"type": "object", "properties": { + "position": {"type": "number", "description": "位置(像素)"}, + "direction": {"type": "string", "enum": ["horizontal", "vertical"], "description": "方向"} + }, "required": ["position", "direction"]} + }}, + {"type": "function", "function": { + "name": "clear_guides", + "description": "清除所有参考线", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "fill_selection", + "description": "用颜色填充当前选区", + "parameters": {"type": "object", "properties": { + "color": {"type": "string", "description": "填充颜色 hex"}, + "opacity": {"type": "number", "description": "填充不透明度 0-100(默认 100)"} + }, "required": ["color"]} + }}, + {"type": "function", "function": { + "name": "inverse_selection", + "description": "反选(选中未选区域)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "feather_selection", + "description": "羽化选区边缘", + "parameters": {"type": "object", "properties": { + "radius": {"type": "number", "description": "羽化半径(像素)"} + }, "required": ["radius"]} + }}, + {"type": "function", "function": { + "name": "adjust_levels", + "description": "调整色阶(输入黑点/灰度系数/白点)", + "parameters": {"type": "object", "properties": { + "black": {"type": "number", "description": "输入黑点 0-253(默认 0)"}, + "gamma": {"type": "number", "description": "灰度系数 0.1-9.99(默认 1.0,<1变暗 >1变亮)"}, + "white": {"type": "number", "description": "输入白点 2-255(默认 255)"} + }, "required": ["black", "gamma", "white"]} + }}, + {"type": "function", "function": { + "name": "copy_merged_to_new_layer", + "description": "复制合并所有可见内容到新图层(Ctrl+Shift+C + 粘贴)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "lock_layer", + "description": "锁定或解锁图层", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "locked": {"type": "boolean", "description": "true=锁定,false=解锁"} + }, "required": ["name", "locked"]} + }}, + {"type": "function", "function": { + "name": "set_foreground_color", + "description": "设置前景色", + "parameters": {"type": "object", "properties": { + "color": {"type": "string", "description": "颜色 hex 值"} + }, "required": ["color"]} + }}, + {"type": "function", "function": { + "name": "save_document_as", + "description": "文件另存为(支持 PSD/PNG/JPG)", + "parameters": {"type": "object", "properties": { + "path": {"type": "string", "description": "保存路径"}, + "format": {"type": "string", "enum": ["psd", "png", "jpg"], "description": "文件格式"} + }, "required": ["path"]} + }}, + {"type": "function", "function": { + "name": "add_layer_mask", + "description": "为图层添加蒙版(全白=全显示 / 全黑=全隐藏 / 基于当前选区)", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "hide_all": {"type": "boolean", "description": "true=全黑蒙版(隐藏),false=全白蒙版(显示,默认)"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "delete_layer_mask", + "description": "删除图层蒙版", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "apply": {"type": "boolean", "description": "true=应用蒙版后删除,false=直接丢弃(默认)"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "convert_to_smart_object", + "description": "将图层转为智能对象", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"} + }, "required": ["name"]} + }}, + {"type": "function", "function": { + "name": "adjust_hsl", + "description": "调整色相/饱和度/明度(H=-180~180, S=-100~100, L=-100~100)", + "parameters": {"type": "object", "properties": { + "hue": {"type": "number", "description": "色相偏移 -180到180"}, + "saturation": {"type": "number", "description": "饱和度 -100到100"}, + "lightness": {"type": "number", "description": "明度 -100到100"} + }, "required": ["hue", "saturation", "lightness"]} + }}, + {"type": "function", "function": { + "name": "create_text_layer", + "description": "创建文字图层", + "parameters": {"type": "object", "properties": { + "text": {"type": "string", "description": "文字内容"}, + "x": {"type": "number", "description": "X 坐标(像素)"}, + "y": {"type": "number", "description": "Y 坐标(像素)"}, + "font_size": {"type": "number", "description": "字号(像素)"}, + "color": {"type": "string", "description": "颜色 hex(默认黑色)"}, + "font_name": {"type": "string", "description": "字体名称(可选)"} + }, "required": ["text", "x", "y", "font_size"]} + }}, + {"type": "function", "function": { + "name": "get_active_layer_info", + "description": "获取当前选中图层的详细信息(名称/类型/尺寸/位置/透明度/混合模式等)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "list_all_layer_names", + "description": "列出文档中所有图层名称(扁平列表,含类型和可见性)", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "duplicate_layer_to_document", + "description": "复制图层到另一个已打开的文档", + "parameters": {"type": "object", "properties": { + "name": {"type": "string", "description": "图层名称"}, + "target_doc": {"type": "string", "description": "目标文档名称"} + }, "required": ["name", "target_doc"]} + }}, + {"type": "function", "function": { + "name": "switch_document", + "description": "切换到另一个已打开的文档", + "parameters": {"type": "object", "properties": { + "doc_name": {"type": "string", "description": "文档名称"} + }, "required": ["doc_name"]} + }}, + {"type": "function", "function": { + "name": "list_documents", + "description": "列出所有已打开的文档", + "parameters": {"type": "object", "properties": {}, "required": []} + }}, + {"type": "function", "function": { + "name": "get_pixel_color", + "description": "获取画布上指定坐标的像素颜色(取色器)", + "parameters": {"type": "object", "properties": { + "x": {"type": "number", "description": "X 坐标"}, + "y": {"type": "number", "description": "Y 坐标"} + }, "required": ["x", "y"]} + }}, + {"type": "function", "function": { + "name": "trim_document", + "description": "自动裁切(去除透明或纯色边缘)", + "parameters": {"type": "object", "properties": { + "type": {"type": "string", "enum": ["transparent","topleft"], "description": "裁切依据"} + }, "required": []} + }}, + {"type": "function", "function": { + "name": "close_document", + "description": "关闭当前文档", + "parameters": {"type": "object", "properties": { + "save": {"type": "boolean", "description": "是否保存(默认不保存)"} + }, "required": []} + }}, ] # 工具名称映射(中文显示用) @@ -236,4 +689,66 @@ TOOL_DISPLAY_NAMES = { "generate_garment_preview": "生成花样预览", "extract_and_apply_all_pieces": "提取并套图", "verify_pattern_result": "验证套图效果", + # PS 通用操作 + "move_layer": "移动图层", + "resize_layer": "缩放图层", + "rotate_layer": "旋转图层", + "flip_layer": "翻转图层", + "set_layer_opacity": "设置透明度", + "set_layer_blend_mode": "设置混合模式", + "set_layer_visible": "显示/隐藏图层", + "set_text_content": "修改文字", + "set_text_color": "修改文字颜色", + "set_text_size": "修改字号", + "group_layers": "编组图层", + "merge_visible": "合并可见", + "flatten_image": "拼合图像", + "resize_canvas": "调整画布", + "adjust_brightness_contrast": "亮度/对比度", + "desaturate": "去色", + "rasterize_layer": "栅格化", + "align_layers_tool": "对齐图层", + "distribute_layers": "分布图层", + "add_stroke": "添加描边", + "create_clipping_mask": "创建剪贴蒙版", + "release_clipping_mask": "释放剪贴蒙版", + "undo": "撤销", + "get_layer_bounds": "获取图层位置", + "create_document": "新建文档", + "save_document": "保存文档", + "resize_image": "调整图像大小", + "gaussian_blur": "高斯模糊", + "auto_levels": "自动色阶", + "auto_contrast": "自动对比度", + "invert_colors": "反相", + "convert_to_rgb": "转RGB", + "convert_to_cmyk": "转CMYK", + "add_drop_shadow": "添加投影", + "clear_layer_effects": "清除图层样式", + "bring_to_front": "置顶图层", + "send_to_back": "置底图层", + "crop_document": "裁切画布", + "trim_document": "自动裁切", + "add_guide": "添加参考线", + "clear_guides": "清除参考线", + "fill_selection": "填充选区", + "inverse_selection": "反选", + "feather_selection": "羽化选区", + "adjust_levels": "色阶", + "copy_merged_to_new_layer": "复制合并", + "lock_layer": "锁定图层", + "set_foreground_color": "设置前景色", + "save_document_as": "另存为", + "add_layer_mask": "添加蒙版", + "delete_layer_mask": "删除蒙版", + "convert_to_smart_object": "转智能对象", + "adjust_hsl": "色相/饱和度", + "create_text_layer": "创建文字", + "get_active_layer_info": "查看当前图层", + "list_all_layer_names": "列出所有图层", + "duplicate_layer_to_document": "复制到文档", + "switch_document": "切换文档", + "list_documents": "列出文档", + "get_pixel_color": "取色", + "close_document": "关闭文档", } diff --git a/Server/debug_images/1770454866_input_0.jpg b/Server/debug_images/1770454866_input_0.jpg new file mode 100644 index 0000000..4ac3a3d Binary files /dev/null and b/Server/debug_images/1770454866_input_0.jpg differ diff --git a/Server/debug_images/1770454866_input_1.jpg b/Server/debug_images/1770454866_input_1.jpg new file mode 100644 index 0000000..2c386b5 Binary files /dev/null and b/Server/debug_images/1770454866_input_1.jpg differ diff --git a/Server/debug_images/1770454900_output.png b/Server/debug_images/1770454900_output.png new file mode 100644 index 0000000..b376a4b Binary files /dev/null and b/Server/debug_images/1770454900_output.png differ diff --git a/Server/debug_images/1770454986_gemini_output.jpeg b/Server/debug_images/1770454986_gemini_output.jpeg new file mode 100644 index 0000000..27c286c Binary files /dev/null and b/Server/debug_images/1770454986_gemini_output.jpeg differ diff --git a/Server/debug_images/1770455007_crop_M-1.png b/Server/debug_images/1770455007_crop_M-1.png new file mode 100644 index 0000000..18f6ef7 Binary files /dev/null and b/Server/debug_images/1770455007_crop_M-1.png differ diff --git a/Server/debug_images/1770455023_gemini_output.jpeg b/Server/debug_images/1770455023_gemini_output.jpeg new file mode 100644 index 0000000..7bc3de1 Binary files /dev/null and b/Server/debug_images/1770455023_gemini_output.jpeg differ diff --git a/Server/debug_images/1770455273_input_0.jpg b/Server/debug_images/1770455273_input_0.jpg new file mode 100644 index 0000000..4ac3a3d Binary files /dev/null and b/Server/debug_images/1770455273_input_0.jpg differ diff --git a/Server/debug_images/1770455273_input_1.jpg b/Server/debug_images/1770455273_input_1.jpg new file mode 100644 index 0000000..2cc2a7e Binary files /dev/null and b/Server/debug_images/1770455273_input_1.jpg differ diff --git a/Server/debug_images/1770455309_output.png b/Server/debug_images/1770455309_output.png new file mode 100644 index 0000000..7d1ddc5 Binary files /dev/null and b/Server/debug_images/1770455309_output.png differ diff --git a/Server/debug_images/1770455392_gemini_output.jpeg b/Server/debug_images/1770455392_gemini_output.jpeg new file mode 100644 index 0000000..98cece4 Binary files /dev/null and b/Server/debug_images/1770455392_gemini_output.jpeg differ diff --git a/Server/debug_images/1770455415_crop_M-1.png b/Server/debug_images/1770455415_crop_M-1.png new file mode 100644 index 0000000..02dedb5 Binary files /dev/null and b/Server/debug_images/1770455415_crop_M-1.png differ diff --git a/Server/debug_images/1770455431_gemini_output.jpeg b/Server/debug_images/1770455431_gemini_output.jpeg new file mode 100644 index 0000000..7808c27 Binary files /dev/null and b/Server/debug_images/1770455431_gemini_output.jpeg differ diff --git a/Server/debug_images/1770455660_gemini_output.jpeg b/Server/debug_images/1770455660_gemini_output.jpeg new file mode 100644 index 0000000..725f572 Binary files /dev/null and b/Server/debug_images/1770455660_gemini_output.jpeg differ diff --git a/Server/debug_images/1770455704_crop_M-1.png b/Server/debug_images/1770455704_crop_M-1.png new file mode 100644 index 0000000..b7768fb Binary files /dev/null and b/Server/debug_images/1770455704_crop_M-1.png differ diff --git a/Server/debug_images/1770455721_gemini_output.jpeg b/Server/debug_images/1770455721_gemini_output.jpeg new file mode 100644 index 0000000..9b7fadb Binary files /dev/null and b/Server/debug_images/1770455721_gemini_output.jpeg differ diff --git a/Server/debug_images/1770455846_gemini_output.jpeg b/Server/debug_images/1770455846_gemini_output.jpeg new file mode 100644 index 0000000..9a51374 Binary files /dev/null and b/Server/debug_images/1770455846_gemini_output.jpeg differ diff --git a/Server/debug_images/1770455863_crop_M-1.png b/Server/debug_images/1770455863_crop_M-1.png new file mode 100644 index 0000000..de32a51 Binary files /dev/null and b/Server/debug_images/1770455863_crop_M-1.png differ diff --git a/Server/debug_images/1770455879_gemini_output.jpeg b/Server/debug_images/1770455879_gemini_output.jpeg new file mode 100644 index 0000000..5146b57 Binary files /dev/null and b/Server/debug_images/1770455879_gemini_output.jpeg differ diff --git a/开发准则.md b/开发准则.md index 659a028..c82f89c 100644 --- a/开发准则.md +++ b/开发准则.md @@ -213,18 +213,64 @@ ### 3.4 目录结构建议 (参考) ``` -src/ - ├── assets/ # 静态资源 +Designer/src/ + ├── api/ + │ ├── ai.ts # AI 聊天/图片/模型 API 接口 + │ ├── auth.ts # 认证接口 + │ ├── user.ts # 用户接口 + │ └── jsxApi/inline/ + │ ├── utils.ts # JSX 公共工具库 (evalInlineJSX) + │ ├── pattern-ai.ts # AI 套图 JSX(置入/填色/标签/剪贴蒙版) + │ ├── layer.ts # 图层操作 + │ ├── document.ts # 文档操作 + │ └── ... + ├── view/ + │ ├── AiChat.vue # AI 聊天页面(Gemini 风格 UI) + │ └── ... + ├── utils/ + │ ├── aiToolExecutor.ts # AI 工具执行器(tool_call → JSX 映射) + │ ├── aiCanvasCapture.ts # 画布截图 / 临时文件管理 + │ ├── logger.ts # 日志工具 + │ └── request.ts # Axios 封装 ├── components/ # 通用组件 - ├── view/ # 页面视图 (Pages) ├── router/ # 路由配置 ├── hooks/ # 组合式函数 (Composables) ├── types/ # TypeScript 类型定义 - ├── utils/ # 工具函数 (含 CSInterface 封装) ├── App.vue └── main.ts + +Server/app/api/v1/ + ├── ai_chat.py # AI 聊天路由(对话/会话/图片处理端点) + ├── ai_llm.py # AI 模型调用层(Qwen/Gemini/Vision/Image 统一路由) + ├── ai_tools.py # AI 工具定义(PS 操作的 function calling schema) + ├── auth.py # 认证路由 + └── ... ``` +### 3.5.5 AI 模块开发规范 + +1. **模型调用统一走 `ai_llm.py`**: + - 所有 LLM/Vision/Image 调用逻辑集中在此文件 + - 自动路由 Qwen(DashScope)和 Gemini(第三方代理) + - 禁止在路由端点中直接调用 `OpenAI()` 客户端 + +2. **工具执行器模式**(前端): + - AI 返回 `tool_calls` → 前端 `aiToolExecutor.ts` 执行 → 回传结果给后端 + - 工具函数映射表 `toolHandlers` 统一管理 + - 状态(预览图 URL、裁片分析结果等)存在模块级变量中 + +3. **CEP 中的 IME(中文输入)兼容**: + - ❌ **禁止使用 Arco Design 的 `a-textarea` 做主输入框**(会拦截 IME composition 事件) + - ✅ **使用原生 `