fix: clean generated tuhui titles
This commit is contained in:
@@ -62,6 +62,35 @@ def _safe_name(text: str, fallback: str = "image") -> str:
|
||||
return cleaned[:40] or fallback
|
||||
|
||||
|
||||
def _looks_like_bad_title(text: str) -> bool:
|
||||
value = str(text or "").strip().lower()
|
||||
if not value:
|
||||
return True
|
||||
if "http" in value or "www" in value or "alicdn" in value or "imgextra" in value:
|
||||
return True
|
||||
if re.search(r"\b(o1cn|jpg|jpeg|png|webp|gif)\b", value):
|
||||
return True
|
||||
if value.count("_") >= 3 and not re.search(r"[\u4e00-\u9fa5]{2,}", value):
|
||||
return True
|
||||
alnum = re.sub(r"[^0-9a-z_]+", "", value)
|
||||
if alnum and len(alnum) >= 16 and not re.search(r"[\u4e00-\u9fa5]", value):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _pick_clean_title_part(raw: str) -> str:
|
||||
cleaned = _safe_name(raw, "")
|
||||
if not cleaned or _looks_like_bad_title(cleaned):
|
||||
return ""
|
||||
parts = [part for part in cleaned.split("_") if part]
|
||||
meaningful = [part for part in parts if not _looks_like_bad_title(part) and len(part) >= 2]
|
||||
if meaningful:
|
||||
cleaned = "_".join(meaningful[:3])
|
||||
if _looks_like_bad_title(cleaned):
|
||||
return ""
|
||||
return cleaned[:30]
|
||||
|
||||
|
||||
def _suffix_from_url(url: str) -> str:
|
||||
path = urlparse(str(url or "")).path
|
||||
suffix = Path(path).suffix.lower()
|
||||
@@ -82,11 +111,11 @@ def _build_processing_prompt(intent: str, requirement_text: str, analysis: Dict)
|
||||
|
||||
def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str:
|
||||
analysis = analysis or {}
|
||||
suggested = _safe_name(str(analysis.get("title_suggest") or ""), "")
|
||||
suggested = _pick_clean_title_part(str(analysis.get("title_suggest") or ""))
|
||||
if suggested:
|
||||
return suggested
|
||||
subject = _safe_name(str(analysis.get("subject") or ""), "")
|
||||
proc_type = _safe_name(str(analysis.get("proc_type") or ""), "")
|
||||
subject = _pick_clean_title_part(str(analysis.get("subject") or ""))
|
||||
proc_type = _pick_clean_title_part(str(analysis.get("proc_type") or ""))
|
||||
|
||||
parts = [part for part in (subject, proc_type) if part]
|
||||
if parts:
|
||||
|
||||
Reference in New Issue
Block a user