fix: clean generated tuhui titles

This commit is contained in:
2026-03-10 15:48:27 +08:00
parent 3f45a4badd
commit 2c003e9a7d

View File

@@ -62,6 +62,35 @@ def _safe_name(text: str, fallback: str = "image") -> str:
return cleaned[:40] or fallback return cleaned[:40] or fallback
def _looks_like_bad_title(text: str) -> bool:
value = str(text or "").strip().lower()
if not value:
return True
if "http" in value or "www" in value or "alicdn" in value or "imgextra" in value:
return True
if re.search(r"\b(o1cn|jpg|jpeg|png|webp|gif)\b", value):
return True
if value.count("_") >= 3 and not re.search(r"[\u4e00-\u9fa5]{2,}", value):
return True
alnum = re.sub(r"[^0-9a-z_]+", "", value)
if alnum and len(alnum) >= 16 and not re.search(r"[\u4e00-\u9fa5]", value):
return True
return False
def _pick_clean_title_part(raw: str) -> str:
cleaned = _safe_name(raw, "")
if not cleaned or _looks_like_bad_title(cleaned):
return ""
parts = [part for part in cleaned.split("_") if part]
meaningful = [part for part in parts if not _looks_like_bad_title(part) and len(part) >= 2]
if meaningful:
cleaned = "_".join(meaningful[:3])
if _looks_like_bad_title(cleaned):
return ""
return cleaned[:30]
def _suffix_from_url(url: str) -> str: def _suffix_from_url(url: str) -> str:
path = urlparse(str(url or "")).path path = urlparse(str(url or "")).path
suffix = Path(path).suffix.lower() suffix = Path(path).suffix.lower()
@@ -82,11 +111,11 @@ def _build_processing_prompt(intent: str, requirement_text: str, analysis: Dict)
def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str: def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str:
analysis = analysis or {} analysis = analysis or {}
suggested = _safe_name(str(analysis.get("title_suggest") or ""), "") suggested = _pick_clean_title_part(str(analysis.get("title_suggest") or ""))
if suggested: if suggested:
return suggested return suggested
subject = _safe_name(str(analysis.get("subject") or ""), "") subject = _pick_clean_title_part(str(analysis.get("subject") or ""))
proc_type = _safe_name(str(analysis.get("proc_type") or ""), "") proc_type = _pick_clean_title_part(str(analysis.get("proc_type") or ""))
parts = [part for part in (subject, proc_type) if part] parts = [part for part in (subject, proc_type) if part]
if parts: if parts: