Files
DP/Server/app/api/v1/ai_skills.py

637 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AI 技能注册表
为 AI 助手提供可复用的任务模式、工作流和工具边界。
"""
from dataclasses import dataclass
from typing import Iterable, Optional
@dataclass(frozen=True)
class AiSkill:
id: str
name: str
description: str
mode: str
keywords: tuple[str, ...]
allowed_tools: tuple[str, ...]
workflow: tuple[str, ...]
guardrails: tuple[str, ...]
success_criteria: tuple[str, ...]
planning_style: str
image_hint: str = ""
origin: str = ""
origin_url: str = ""
upstream_skill: str = ""
triage_questions: tuple[str, ...] = ()
deliverables: tuple[str, ...] = ()
execution_notes: tuple[str, ...] = ()
def to_public_dict(self) -> dict:
return {
"id": self.id,
"name": self.name,
"description": self.description,
"mode": self.mode,
"planning_style": self.planning_style,
"image_hint": self.image_hint,
"origin": self.origin,
"origin_url": self.origin_url,
"upstream_skill": self.upstream_skill,
}
AUTO_SKILL = {
"id": "auto",
"name": "智能自动选择",
"description": "按当前问题、图片和 Photoshop 上下文,自动切换最合适的技能。",
"mode": "auto",
"planning_style": "先判断任务类型,再切到最匹配的专家技能。",
"image_hint": "适合不知道该选哪个技能时使用。",
"origin": "内置技能路由",
"origin_url": "",
"upstream_skill": "",
}
OPENCLAW_REPO_URL = "https://github.com/openclaw/skills"
OPENCLAW_PS_AUTOMATOR_URL = (
"https://github.com/openclaw/skills/tree/main/skills/abdul-karim-mia/photoshop-automator"
)
OPENCLAW_UI_UX_PRO_URL = (
"https://github.com/openclaw/skills/tree/main/skills/15349185792/ui-ux-pro-max-0-1-0"
)
OPENCLAW_UI_DESIGNER_URL = (
"https://github.com/openclaw/skills/tree/main/skills/1999azzar/ui-designer-skill"
)
SKILLS: tuple[AiSkill, ...] = (
AiSkill(
id="ps-generalist",
name="PS 全能助手",
description="处理一般 Photoshop 操作、图层整理、文档检查、稳定执行和轻量自动化任务。",
mode="tool_agent",
keywords=(
"ps",
"photoshop",
"图层",
"文档",
"画布",
"对齐",
"移动",
"旋转",
"缩放",
"文字",
"保存",
"导出",
"出图",
"效果图",
"不会",
"怎么做",
"怎么操作",
"如何操作",
"教我",
"报错",
"错误",
"失败",
"没反应",
"不能",
"无法",
"为什么",
),
allowed_tools=(
"get_document_info",
"get_layer_structure",
"get_active_layer_info",
"list_all_layer_names",
"list_documents",
"switch_document",
"create_layer",
"rename_layer",
"delete_layer",
"duplicate_layer",
"create_layer_group",
"move_layer_to_group",
"group_layers",
"move_layer",
"resize_layer",
"rotate_layer",
"flip_layer",
"align_layers",
"align_layers_tool",
"distribute_layers",
"set_layer_visible",
"set_layer_opacity",
"set_layer_blend_mode",
"bring_to_front",
"send_to_back",
"resize_canvas",
"create_text_layer",
"set_text_content",
"set_text_color",
"set_text_size",
"generate_design_images",
"save_document",
"save_document_as",
"undo",
"close_document",
),
workflow=(
"先确认当前活动文档、目标图层和任务边界,再决定操作路径。",
"优先用最少的步骤完成任务,不做多余改动,能查就先查。",
"执行后总结结果,并明确告诉用户下一步还能继续做什么。",
),
guardrails=(
"涉及删除、关闭、覆盖保存时先征求确认。",
"不清楚目标图层时先查询,不要盲改。",
"能在新图层或新图层组完成的改动,不直接破坏原图层。",
"Photoshop 若有模态弹窗或保存窗口,先提醒用户关闭,再继续工具执行。",
"按图层名操作时要求精确匹配,不要自己猜测最像的图层。",
),
success_criteria=(
"操作路径清晰",
"图层关系不混乱",
"文档仍然可继续编辑",
),
planning_style="先查活动文档和目标图层,再决定是查询、轻改还是执行一串可回退的 PS 操作。",
image_hint="适合边聊边操作 Photoshop而不是深度看图分析。",
origin="改造自 OpenClaw 的 photoshop-automator",
origin_url=OPENCLAW_PS_AUTOMATOR_URL,
upstream_skill="openclaw/photoshop-automator",
execution_notes=(
"默认围绕当前活动文档工作;没有文档时先提示用户打开文件。",
"执行链路尽量短、原子化,避免一次堆很多风险操作。",
"遇到文本替换、图层修改这类任务时,先确认名称和对象,再执行。",
"如果用户是在问不会怎么做、为什么失败或下一步怎么操作,先读取当前上下文再答疑,必要时同轮直接执行。",
),
),
AiSkill(
id="garment-pattern-mapper",
name="服装套图师",
description="处理服装成衣、裁片、花型、面料和平铺套图任务。",
mode="tool_agent",
keywords=(
"套图",
"裁片",
"成衣",
"服装",
"花型",
"花样",
"印花",
"面料",
"前片",
"后片",
"袖子",
"pattern",
"garment",
),
allowed_tools=(
"get_document_info",
"get_layer_structure",
"list_all_layer_names",
"identify_pieces",
"generate_garment_preview",
"extract_and_apply_all_pieces",
"verify_pattern_result",
),
workflow=(
"先判断用户是要识别、预览还是正式套图。",
"需要套图时,先识别裁片和花型模式,再生成预览或直接走 all_over 流程。",
"正式套图后用验证结果总结问题和改进建议。",
),
guardrails=(
"生成预览后,除非用户明确确认,否则不要直接进入正式套图。",
"识别结果不确定时要先说明,再继续下一步。",
"没有参考图或没有有效裁片上下文时,不要假装已经完成套图。",
),
success_criteria=(
"裁片识别合理",
"花型方向和比例尽量接近原图",
"流程状态对用户透明",
),
planning_style="把任务拆成 识别 -> 预览 -> 确认 -> 正式套图 -> 验证 五段式流程。",
image_hint="适合用户上传成衣图、要求识别裁片、生成预览或正式套图时使用。",
),
AiSkill(
id="ps-layout-designer",
name="PS 排版设计师",
description="处理海报、详情页、主视觉、标题层级、品牌感和版式整理任务。",
mode="tool_agent",
keywords=(
"海报",
"排版",
"版式",
"标题",
"字体",
"封面",
"主视觉",
"详情页",
"banner",
"构图",
"留白",
"层级",
"品牌感",
"视觉方向",
"卖点",
"背景图",
"效果图",
"方案图",
),
allowed_tools=(
"get_document_info",
"get_layer_structure",
"list_all_layer_names",
"get_active_layer_info",
"create_text_layer",
"set_text_content",
"set_text_color",
"set_text_size",
"create_layer_group",
"group_layers",
"move_layer",
"resize_layer",
"align_layers_tool",
"distribute_layers",
"bring_to_front",
"send_to_back",
"set_layer_opacity",
"set_layer_blend_mode",
"add_stroke",
"add_drop_shadow",
"generate_design_images",
),
workflow=(
"先分析画布比例、已有图层、信息密度和视觉重心。",
"先给出版式思路,再搭主标题、辅文、卖点和装饰层。",
"最后统一对齐、层级、间距、可读性和品牌感。",
),
guardrails=(
"涉及大改版式时,优先新建组或新图层,而不是覆盖原设计。",
"没有足够上下文时,先做查询或提出一版轻量方案。",
"不要一次性做太多不可逆操作。",
),
success_criteria=(
"视觉层级清楚",
"对齐和间距统一",
"画面有主次和留白",
),
planning_style="先做版式诊断和信息架构,再分步搭建图层,不要一上来就乱动。",
image_hint="适合海报排版、标题优化、版式调整和页面重构。",
origin="改造自 OpenClaw 的 ui-ux-pro-max / ui-designer-skill",
origin_url=OPENCLAW_UI_UX_PRO_URL,
upstream_skill="openclaw/ui-ux-pro-max + ui-designer-skill",
triage_questions=(
"当前作品更偏海报、封面、电商主图还是详情页?",
"这次要优先提升点击率、品牌感、层级清晰度还是信息转化?",
"有没有必须保留的品牌色、主标题、卖点或参考风格?",
),
deliverables=(
"一句话视觉方向",
"标题/副标题/卖点的层级方案",
"对齐、留白、构图和配色建议",
"对应到 Photoshop 的图层调整动作",
),
execution_notes=(
"先拆信息优先级,再决定字体大小、位置和装饰强度。",
"尽量在新组内搭结构,保留原稿便于回退和对比。",
"如果上下文不足,先给一版轻量方向,再等用户确认后深化。",
),
),
AiSkill(
id="creative-direction-strategist",
name="创意方向设计师",
description="处理参考图拆解、品牌调性、视觉方向、配色与构图方案,把灵感转成可执行设计路线。",
mode="hybrid_chat",
keywords=(
"创意",
"方向",
"灵感",
"调性",
"参考图",
"品牌感",
"构图",
"配色",
"氛围",
"风格方案",
"视觉方向",
"art direction",
"moodboard",
"方向图",
"概念图",
"效果图",
"出图",
"方案图",
"插画",
"画面",
"方向稿",
"素材图",
),
allowed_tools=(
"get_document_info",
"get_layer_structure",
"list_all_layer_names",
"create_layer_group",
"create_text_layer",
"align_layers_tool",
"add_guide",
"generate_design_images",
),
workflow=(
"先理解任务目标和参考图里最有价值的视觉线索。",
"再给出 2 到 3 条创意方向,说明各自的调性、构图和适用场景。",
"最后把选中的方向转成 Photoshop 可执行的排版、图层和颜色建议。",
),
guardrails=(
"不要只给抽象评价,要把风格结论落到可执行动作上。",
"没有足够上下文时,不要假设品牌调性和目标人群。",
"除非用户要求,不直接大改现有图层结构。",
),
success_criteria=(
"风格判断可信",
"方向方案有区分度",
"建议能直接落回 Photoshop",
),
planning_style="先做视觉诊断,再给 2 到 3 条方向,再落成可执行的排版和图层建议。",
image_hint="适合上传参考图后,让 AI 像设计师一样拆方向、配色、构图和执行路径。",
origin="改造自 OpenClaw 的 ui-ux-pro-max / ui-designer-skill",
origin_url=OPENCLAW_REPO_URL,
upstream_skill="openclaw/ui-ux-pro-max + ui-designer-skill",
triage_questions=(
"这次更偏品牌升级、活动海报、电商转化还是氛围图?",
"想保留参考图里的哪些东西:配色、构图、质感还是情绪?",
"输出要更稳重、高级、年轻还是更强转化?",
),
deliverables=(
"2 到 3 条创意方向",
"主视觉、标题、辅助元素的布局思路",
"配色、字体和材质建议",
"进入 Photoshop 后的第一轮动作清单",
),
execution_notes=(
"有参考图时先拆视觉语言,再给风格方向,不要直接开始操作。",
"没有图片时也要先问清目标和场景,再给方案。",
"当用户确认方向后,再切到排版或修图类技能深入执行。",
),
),
AiSkill(
id="product-retoucher",
name="修图精修师",
description="处理抠图、调色、去背、产品主图优化、亮度对比和图层修整任务。",
mode="tool_agent",
keywords=(
"修图",
"精修",
"抠图",
"去背",
"调色",
"亮度",
"对比度",
"磨皮",
"主图",
"高光",
"阴影",
"retouch",
),
allowed_tools=(
"get_document_info",
"get_layer_structure",
"get_active_layer_info",
"duplicate_layer",
"create_layer",
"rasterize_layer",
"adjust_brightness_contrast",
"adjust_levels",
"adjust_hsl",
"auto_levels",
"auto_contrast",
"desaturate",
"gaussian_blur",
"create_clipping_mask",
"release_clipping_mask",
"add_layer_mask",
"delete_layer_mask",
"fill_selection",
"inverse_selection",
"feather_selection",
"copy_merged_to_new_layer",
"set_layer_blend_mode",
"set_layer_opacity",
"save_document",
"save_document_as",
),
workflow=(
"先判断任务属于结构修图、颜色修正还是产品精修。",
"优先做可回退的副本或蒙版,再进行调整。",
"最终说明改动方向和还可以继续优化的点。",
),
guardrails=(
"不要直接破坏唯一原图层。",
"没有明确选区或图层时先查询上下文。",
"涉及批量覆盖时先提醒用户风险。",
),
success_criteria=(
"改动可回退",
"主体更清晰",
"颜色和层次更稳定",
),
planning_style="优先非破坏性修图:复制、蒙版、调整,再视情况合并。",
image_hint="适合产品主图、商品精修、调色和去背类任务。",
),
AiSkill(
id="visual-analysis-advisor",
name="看图分析师",
description="处理上传图片后的版型、风格、配色、元素拆解、设计建议和参考分析。",
mode="vision_chat",
keywords=(
"看图",
"分析",
"你看见",
"这张图",
"参考图",
"风格",
"版型",
"配色",
"元素",
"灵感",
"建议",
"评价",
),
allowed_tools=(),
workflow=(
"先解释看到了什么,再拆解风格、结构和重点元素。",
"如果用户有执行目标,再把分析转成可落地的 Photoshop 操作建议。",
),
guardrails=(
"不要虚构已经执行过 Photoshop 操作。",
"分析不确定时用概率表达,不要装作绝对正确。",
),
success_criteria=(
"描述准确",
"建议可执行",
"重点突出",
),
planning_style="先看图,再给结论,最后给下一步操作建议。",
image_hint="适合用户上传图片后问“你看见了什么”“这个版型/风格怎么做”。",
origin="改造自 OpenClaw 的 ui-designer-skill",
origin_url=OPENCLAW_UI_DESIGNER_URL,
upstream_skill="openclaw/ui-designer-skill",
deliverables=(
"图片内容描述",
"风格、构图、配色和元素拆解",
"可以继续在 Photoshop 落地的建议",
),
),
)
SKILL_BY_ID = {skill.id: skill for skill in SKILLS}
def list_ai_skills_public() -> list[dict]:
return [AUTO_SKILL, *(skill.to_public_dict() for skill in SKILLS)]
def get_ai_skill(skill_id: Optional[str]) -> Optional[AiSkill]:
if not skill_id or skill_id == "auto":
return None
return SKILL_BY_ID.get(skill_id)
def _history_text(history_messages: Iterable[dict]) -> str:
parts: list[str] = []
for item in history_messages:
content = str(item.get("content", "")).strip()
if content:
parts.append(content)
return "\n".join(parts[-6:])
def _score_skill(skill: AiSkill, text: str, has_image: bool) -> int:
lowered = text.lower()
score = 0
for keyword in skill.keywords:
if keyword.lower() in lowered:
score += 3
if has_image and skill.mode == "vision_chat":
score += 2
if has_image and skill.id == "garment-pattern-mapper":
score += 1
if skill.id == "garment-pattern-mapper" and any(
token in lowered for token in ("套图", "裁片", "成衣", "花型", "面料", "pattern")
):
score += 6
if skill.id == "ps-layout-designer" and any(
token in lowered
for token in (
"排版",
"海报",
"标题",
"版式",
"封面",
"banner",
"品牌感",
"卖点",
"背景图",
"方案图",
)
):
score += 5
if skill.id == "creative-direction-strategist" and any(
token in lowered
for token in (
"创意",
"方向",
"灵感",
"调性",
"参考图",
"品牌感",
"构图",
"配色",
"氛围",
"风格方案",
"方向图",
"效果图",
"概念图",
"方案图",
"背景图",
"出图",
"生成一张",
"生成四张",
"生成一组",
"插画",
"画面",
"方向稿",
"连贯",
)
):
score += 6
if has_image and skill.id == "creative-direction-strategist":
score += 2
if skill.id == "product-retoucher" and any(
token in lowered for token in ("修图", "去背", "抠图", "调色", "精修", "主图")
):
score += 5
if skill.id == "visual-analysis-advisor" and has_image and any(
token in lowered for token in ("分析", "", "风格", "版型", "你看见", "建议", "元素")
):
score += 6
if skill.id == "ps-generalist" and any(
token in lowered
for token in (
"不会",
"怎么做",
"怎么操作",
"如何操作",
"教我",
"帮我操作",
"帮我弄",
"为什么",
"报错",
"错误",
"失败",
"没反应",
"不能",
"无法",
"下一步",
"该怎么",
)
):
score += 7
if skill.mode == "tool_agent" and any(
token in lowered for token in ("帮我做", "直接做", "你来做", "顺手做", "帮我操作")
):
score += 4
return score
def resolve_ai_skill(
message: str,
history_messages: Optional[Iterable[dict]] = None,
requested_skill_id: Optional[str] = None,
has_image: bool = False,
) -> AiSkill:
explicit_skill = get_ai_skill(requested_skill_id)
if explicit_skill:
return explicit_skill
combined_text = f"{_history_text(history_messages or [])}\n{message or ''}".strip()
combined_text = combined_text or ""
best_skill = SKILL_BY_ID["ps-generalist"]
best_score = -1
for skill in SKILLS:
score = _score_skill(skill, combined_text, has_image)
if score > best_score:
best_skill = skill
best_score = score
if has_image and best_score <= 1:
return SKILL_BY_ID["visual-analysis-advisor"]
return best_skill