""" 图片处理结果质检模块 处理完成后,用视觉 AI 对比原图和结果图,判断是否符合客户需求。 评分 0-100,低于阈值则判定不合格,触发重试或人工跟进。 """ import base64 import os import time import asyncio from typing import Optional from dotenv import load_dotenv load_dotenv() _QA_PASS_SCORE = int(os.getenv("QA_PASS_SCORE", "70")) # 合格分数线,默认70 QA_PROMPT_TEMPLATE = """\ 你是一名专业的图片处理质检员,需要评估处理结果是否满足要求。 【处理类型】{proc_type} 【客户需求/Gemini提示词】{gemini_prompt} 【原图描述】主体:{subject},类型:{proc_type},质量:{quality} 请对比左图(原图)和右图(处理结果),从以下维度打分(每项0-25分): 1. 内容完整性:主体图案/内容是否完整保留,有无缺失、截断 2. 畸变去除:褶皱/透视变形/背景是否已被清除 3. 细节还原:颜色、线条、纹理等细节与原图的匹配程度 4. 输出干净度:背景是否干净,有无多余内容、AI幻觉、模糊块 输出格式(严格按照此格式,每行一个字段): 完整性: <0-25> 畸变: <0-25> 细节: <0-25> 干净: <0-25> 总分: <0-100> 结论: 问题: <简述主要问题,不超过30字,无问题填"无"> 建议: <如果fail,给出重试改进建议,不超过40字,pass则填"无"> """ class ImageQA: """处理结果质检器""" def __init__(self): self.api_key = os.getenv("OPENAI_API_KEY") self.base_url = os.getenv("OPENAI_BASE_URL", "https://open.bigmodel.cn/api/paas/v4") self.model = os.getenv("VISION_MODEL", "glm-4v-flash") self.pass_score = _QA_PASS_SCORE def _to_base64(self, path: str) -> Optional[str]: try: with open(path, "rb") as f: return base64.b64encode(f.read()).decode("utf-8") except Exception as e: print(f"[ImageQA] 读取图片失败 {path}: {e}") return None def _parse(self, text: str) -> dict: def p(key): for line in text.splitlines(): line = line.strip() for k in [f"{key}:", f"{key}:"]: if line.startswith(k): return line[len(k):].strip() return "" try: score = int(p("总分")) except ValueError: score = 0 conclusion = p("结论").lower() if conclusion not in ("pass", "fail"): conclusion = "pass" if score >= self.pass_score else "fail" return { "score": score, "pass": conclusion == "pass", "issue": p("问题"), "suggestion": p("建议"), "detail": { "completeness": p("完整性"), "distortion": p("畸变"), "detail": p("细节"), "clean": p("干净"), }, "raw": text, } async def check( self, original_path: str, result_path: str, proc_type: str = "", subject: str = "", quality: str = "", gemini_prompt: str = "", ) -> dict: """ 质检处理结果。 Args: original_path: 原图本地路径 result_path: 处理结果本地路径 proc_type: 处理类型(印花提取 / 高清修复等) subject: 主体描述 quality: 原图质量 gemini_prompt: 传给 Gemini 的提示词(体现客户需求) Returns: { "score": int, # 0-100 "pass": bool, # 是否合格 "issue": str, # 主要问题 "suggestion": str, # 重试改进建议 "detail": dict, # 各维度分数 } """ if not self.api_key: print("[ImageQA] 未配置 API Key,跳过质检,默认通过") return {"score": 80, "pass": True, "issue": "无", "suggestion": "无", "detail": {}} orig_b64 = self._to_base64(original_path) result_b64 = self._to_base64(result_path) if not orig_b64 or not result_b64: print("[ImageQA] 图片读取失败,跳过质检") return {"score": 75, "pass": True, "issue": "质检图片读取失败", "suggestion": "无", "detail": {}} prompt = QA_PROMPT_TEMPLATE.format( proc_type=proc_type or "图片处理", subject=subject or "未知", quality=quality or "未知", gemini_prompt=gemini_prompt or "按标准处理", ) start = time.monotonic() try: from openai import AsyncOpenAI client = AsyncOpenAI(base_url=self.base_url, api_key=self.api_key) response = await client.responses.create( model=self.model, input=[ { "role": "user", "content": [ { "type": "input_image", "image_url": f"data:image/jpeg;base64,{orig_b64}", }, { "type": "input_image", "image_url": f"data:image/jpeg;base64,{result_b64}", }, { "type": "input_text", "text": prompt, }, ], } ], ) content = response.output_text elapsed = time.monotonic() - start result = self._parse(content) result["elapsed"] = round(elapsed, 1) status = "✓ 合格" if result["pass"] else "✗ 不合格" print(f"[ImageQA] {status} | 得分: {result['score']}/100 | 问题: {result['issue']} | 耗时: {elapsed:.1f}s") if not result["pass"]: print(f"[ImageQA] 改进建议: {result['suggestion']}") try: from utils.api_cost_tracker import record record("gemini_vision", count=1) except Exception: pass return result except Exception as e: elapsed = time.monotonic() - start print(f"[ImageQA] 质检失败 ({elapsed:.1f}s): {e}") return {"score": 75, "pass": True, "issue": f"质检异常: {e}", "suggestion": "无", "detail": {}} # 全局实例 image_qa = ImageQA()