From 2c003e9a7dd155bd7dfb788a349e4d8a9fbc63c4 Mon Sep 17 00:00:00 2001 From: jimi <1847930177@qq.com> Date: Tue, 10 Mar 2026 15:48:27 +0800 Subject: [PATCH] fix: clean generated tuhui titles --- services/service_auto_image_pipeline.py | 35 ++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/services/service_auto_image_pipeline.py b/services/service_auto_image_pipeline.py index ec7137b..2468b4d 100644 --- a/services/service_auto_image_pipeline.py +++ b/services/service_auto_image_pipeline.py @@ -62,6 +62,35 @@ def _safe_name(text: str, fallback: str = "image") -> str: return cleaned[:40] or fallback +def _looks_like_bad_title(text: str) -> bool: + value = str(text or "").strip().lower() + if not value: + return True + if "http" in value or "www" in value or "alicdn" in value or "imgextra" in value: + return True + if re.search(r"\b(o1cn|jpg|jpeg|png|webp|gif)\b", value): + return True + if value.count("_") >= 3 and not re.search(r"[\u4e00-\u9fa5]{2,}", value): + return True + alnum = re.sub(r"[^0-9a-z_]+", "", value) + if alnum and len(alnum) >= 16 and not re.search(r"[\u4e00-\u9fa5]", value): + return True + return False + + +def _pick_clean_title_part(raw: str) -> str: + cleaned = _safe_name(raw, "") + if not cleaned or _looks_like_bad_title(cleaned): + return "" + parts = [part for part in cleaned.split("_") if part] + meaningful = [part for part in parts if not _looks_like_bad_title(part) and len(part) >= 2] + if meaningful: + cleaned = "_".join(meaningful[:3]) + if _looks_like_bad_title(cleaned): + return "" + return cleaned[:30] + + def _suffix_from_url(url: str) -> str: path = urlparse(str(url or "")).path suffix = Path(path).suffix.lower() @@ -82,11 +111,11 @@ def _build_processing_prompt(intent: str, requirement_text: str, analysis: Dict) def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str: analysis = analysis or {} - suggested = _safe_name(str(analysis.get("title_suggest") or ""), "") + suggested = _pick_clean_title_part(str(analysis.get("title_suggest") or "")) if suggested: return suggested - subject = _safe_name(str(analysis.get("subject") or ""), "") - proc_type = _safe_name(str(analysis.get("proc_type") or ""), "") + subject = _pick_clean_title_part(str(analysis.get("subject") or "")) + proc_type = _pick_clean_title_part(str(analysis.get("proc_type") or "")) parts = [part for part in (subject, proc_type) if part] if parts: