Files
tw/image/image_qa.py
2026-02-27 16:03:04 +08:00

190 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
图片处理结果质检模块
处理完成后,用视觉 AI 对比原图和结果图,判断是否符合客户需求。
评分 0-100低于阈值则判定不合格触发重试或人工跟进。
"""
import base64
import os
import time
import asyncio
from typing import Optional
from dotenv import load_dotenv
load_dotenv()
_QA_PASS_SCORE = int(os.getenv("QA_PASS_SCORE", "70")) # 合格分数线默认70
QA_PROMPT_TEMPLATE = """\
你是一名专业的图片处理质检员,需要评估处理结果是否满足要求。
【处理类型】{proc_type}
【客户需求/Gemini提示词】{gemini_prompt}
【原图描述】主体:{subject},类型:{proc_type},质量:{quality}
请对比左图原图和右图处理结果从以下维度打分每项0-25分
1. 内容完整性:主体图案/内容是否完整保留,有无缺失、截断
2. 畸变去除:褶皱/透视变形/背景是否已被清除
3. 细节还原:颜色、线条、纹理等细节与原图的匹配程度
4. 输出干净度背景是否干净有无多余内容、AI幻觉、模糊块
输出格式(严格按照此格式,每行一个字段):
完整性: <0-25>
畸变: <0-25>
细节: <0-25>
干净: <0-25>
总分: <0-100>
结论: <pass|fail>
问题: <简述主要问题不超过30字无问题填"">
建议: <如果fail给出重试改进建议不超过40字pass则填"">
"""
class ImageQA:
"""处理结果质检器"""
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.base_url = os.getenv("OPENAI_BASE_URL", "https://open.bigmodel.cn/api/paas/v4")
self.model = os.getenv("VISION_MODEL", "glm-4v-flash")
self.pass_score = _QA_PASS_SCORE
def _to_base64(self, path: str) -> Optional[str]:
try:
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
except Exception as e:
print(f"[ImageQA] 读取图片失败 {path}: {e}")
return None
def _parse(self, text: str) -> dict:
def p(key):
for line in text.splitlines():
line = line.strip()
for k in [f"{key}:", f"{key}"]:
if line.startswith(k):
return line[len(k):].strip()
return ""
try:
score = int(p("总分"))
except ValueError:
score = 0
conclusion = p("结论").lower()
if conclusion not in ("pass", "fail"):
conclusion = "pass" if score >= self.pass_score else "fail"
return {
"score": score,
"pass": conclusion == "pass",
"issue": p("问题"),
"suggestion": p("建议"),
"detail": {
"completeness": p("完整性"),
"distortion": p("畸变"),
"detail": p("细节"),
"clean": p("干净"),
},
"raw": text,
}
async def check(
self,
original_path: str,
result_path: str,
proc_type: str = "",
subject: str = "",
quality: str = "",
gemini_prompt: str = "",
) -> dict:
"""
质检处理结果。
Args:
original_path: 原图本地路径
result_path: 处理结果本地路径
proc_type: 处理类型(印花提取 / 高清修复等)
subject: 主体描述
quality: 原图质量
gemini_prompt: 传给 Gemini 的提示词(体现客户需求)
Returns:
{
"score": int, # 0-100
"pass": bool, # 是否合格
"issue": str, # 主要问题
"suggestion": str, # 重试改进建议
"detail": dict, # 各维度分数
}
"""
if not self.api_key:
print("[ImageQA] 未配置 API Key跳过质检默认通过")
return {"score": 80, "pass": True, "issue": "", "suggestion": "", "detail": {}}
orig_b64 = self._to_base64(original_path)
result_b64 = self._to_base64(result_path)
if not orig_b64 or not result_b64:
print("[ImageQA] 图片读取失败,跳过质检")
return {"score": 75, "pass": True, "issue": "质检图片读取失败", "suggestion": "", "detail": {}}
prompt = QA_PROMPT_TEMPLATE.format(
proc_type=proc_type or "图片处理",
subject=subject or "未知",
quality=quality or "未知",
gemini_prompt=gemini_prompt or "按标准处理",
)
start = time.monotonic()
try:
from openai import AsyncOpenAI
client = AsyncOpenAI(base_url=self.base_url, api_key=self.api_key)
response = await client.responses.create(
model=self.model,
input=[
{
"role": "user",
"content": [
{
"type": "input_image",
"image_url": f"data:image/jpeg;base64,{orig_b64}",
},
{
"type": "input_image",
"image_url": f"data:image/jpeg;base64,{result_b64}",
},
{
"type": "input_text",
"text": prompt,
},
],
}
],
)
content = response.output_text
elapsed = time.monotonic() - start
result = self._parse(content)
result["elapsed"] = round(elapsed, 1)
status = "✓ 合格" if result["pass"] else "✗ 不合格"
print(f"[ImageQA] {status} | 得分: {result['score']}/100 | 问题: {result['issue']} | 耗时: {elapsed:.1f}s")
if not result["pass"]:
print(f"[ImageQA] 改进建议: {result['suggestion']}")
try:
from utils.api_cost_tracker import record
record("gemini_vision", count=1)
except Exception:
pass
return result
except Exception as e:
elapsed = time.monotonic() - start
print(f"[ImageQA] 质检失败 ({elapsed:.1f}s): {e}")
return {"score": 75, "pass": True, "issue": f"质检异常: {e}", "suggestion": "", "detail": {}}
# 全局实例
image_qa = ImageQA()