fix: clean generated tuhui titles
This commit is contained in:
@@ -62,6 +62,35 @@ def _safe_name(text: str, fallback: str = "image") -> str:
|
|||||||
return cleaned[:40] or fallback
|
return cleaned[:40] or fallback
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_bad_title(text: str) -> bool:
|
||||||
|
value = str(text or "").strip().lower()
|
||||||
|
if not value:
|
||||||
|
return True
|
||||||
|
if "http" in value or "www" in value or "alicdn" in value or "imgextra" in value:
|
||||||
|
return True
|
||||||
|
if re.search(r"\b(o1cn|jpg|jpeg|png|webp|gif)\b", value):
|
||||||
|
return True
|
||||||
|
if value.count("_") >= 3 and not re.search(r"[\u4e00-\u9fa5]{2,}", value):
|
||||||
|
return True
|
||||||
|
alnum = re.sub(r"[^0-9a-z_]+", "", value)
|
||||||
|
if alnum and len(alnum) >= 16 and not re.search(r"[\u4e00-\u9fa5]", value):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_clean_title_part(raw: str) -> str:
|
||||||
|
cleaned = _safe_name(raw, "")
|
||||||
|
if not cleaned or _looks_like_bad_title(cleaned):
|
||||||
|
return ""
|
||||||
|
parts = [part for part in cleaned.split("_") if part]
|
||||||
|
meaningful = [part for part in parts if not _looks_like_bad_title(part) and len(part) >= 2]
|
||||||
|
if meaningful:
|
||||||
|
cleaned = "_".join(meaningful[:3])
|
||||||
|
if _looks_like_bad_title(cleaned):
|
||||||
|
return ""
|
||||||
|
return cleaned[:30]
|
||||||
|
|
||||||
|
|
||||||
def _suffix_from_url(url: str) -> str:
|
def _suffix_from_url(url: str) -> str:
|
||||||
path = urlparse(str(url or "")).path
|
path = urlparse(str(url or "")).path
|
||||||
suffix = Path(path).suffix.lower()
|
suffix = Path(path).suffix.lower()
|
||||||
@@ -82,11 +111,11 @@ def _build_processing_prompt(intent: str, requirement_text: str, analysis: Dict)
|
|||||||
|
|
||||||
def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str:
|
def _build_upload_title(intent: str, analysis: Dict, requirement_text: str, idx: int) -> str:
|
||||||
analysis = analysis or {}
|
analysis = analysis or {}
|
||||||
suggested = _safe_name(str(analysis.get("title_suggest") or ""), "")
|
suggested = _pick_clean_title_part(str(analysis.get("title_suggest") or ""))
|
||||||
if suggested:
|
if suggested:
|
||||||
return suggested
|
return suggested
|
||||||
subject = _safe_name(str(analysis.get("subject") or ""), "")
|
subject = _pick_clean_title_part(str(analysis.get("subject") or ""))
|
||||||
proc_type = _safe_name(str(analysis.get("proc_type") or ""), "")
|
proc_type = _pick_clean_title_part(str(analysis.get("proc_type") or ""))
|
||||||
|
|
||||||
parts = [part for part in (subject, proc_type) if part]
|
parts = [part for part in (subject, proc_type) if part]
|
||||||
if parts:
|
if parts:
|
||||||
|
|||||||
Reference in New Issue
Block a user