#!/usr/bin/env python3 from __future__ import annotations import argparse import re import shutil import subprocess from collections import Counter from dataclasses import dataclass from pathlib import Path ROOT = Path(__file__).resolve().parent.parent ASSETS_DIR = ROOT / "assets" IMAGES_DIR = ASSETS_DIR / "images" BACKUP_DIR = ROOT / "assets_backup" TEXT_EXTENSIONS = {".html", ".css", ".js"} SOURCE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif"} MANAGED_IMAGE_EXTENSIONS = SOURCE_EXTENSIONS | {".webp"} MAX_FILE_SIZE = 300_000 MAX_DIMENSION = 2560 SMALL_FILE_LIMIT = 150 * 1024 SMALL_MIN_SAVINGS = 8 * 1024 LARGE_MIN_RATIO = 0.05 @dataclass(frozen=True) class ImageInfo: width: int height: int colors: int def run(cmd: list[str], capture: bool = False) -> str: result = subprocess.run( cmd, check=True, text=True, capture_output=capture, ) return result.stdout.strip() if capture else "" def should_keep_conversion(source_size: int, candidate_size: int) -> bool: savings = source_size - candidate_size if savings <= 0: return False if source_size < SMALL_FILE_LIMIT: return savings >= SMALL_MIN_SAVINGS return (savings / source_size) >= LARGE_MIN_RATIO def classify_large_image(source_size: int, width: int, height: int) -> bool: return source_size > MAX_FILE_SIZE or max(width, height) > MAX_DIMENSION def image_info(path: Path) -> ImageInfo: output = run( ["magick", "identify", "-ping", "-format", "%w %h %k", str(path)], capture=True, ) width_text, height_text, colors_text = output.split() return ImageInfo(int(width_text), int(height_text), int(colors_text)) def text_files() -> list[Path]: return sorted( path for path in ROOT.rglob("*") if path.is_file() and path.suffix.lower() in TEXT_EXTENSIONS and "assets_backup" not in path.parts and ".git" not in path.parts and "docs" not in path.parts and "tests" not in path.parts ) def source_images() -> list[Path]: return sorted( path for path in IMAGES_DIR.rglob("*") if path.is_file() and path.suffix.lower() in SOURCE_EXTENSIONS and not path.name.startswith("._") ) def colliding_stems(paths: list[Path]) -> set[str]: counts = Counter(path.with_suffix("").relative_to(ROOT).as_posix() for path in paths) return {stem for stem, count in counts.items() if count > 1} def build_webp_path(source: Path, duplicate_stems: set[str]) -> Path: stem_key = source.with_suffix("").as_posix() if not source.is_absolute(): stem_key = source.with_suffix("").as_posix() else: stem_key = source.with_suffix("").relative_to(ROOT).as_posix() if stem_key in duplicate_stems: return source.with_name(f"{source.name}.webp") return source.with_suffix(".webp") def managed_webps() -> list[Path]: return sorted( path for path in IMAGES_DIR.rglob("*.webp") if path.is_file() and not path.name.startswith("._") ) def render_candidate(source: Path, operations: list[str], suffix: str) -> Path: target = source.with_name(f"{source.stem}.{suffix}{source.suffix}") run(["magick", str(source), *operations, str(target)]) return target def render_webp_candidate(source: Path, operations: list[str], suffix: str) -> Path: target = source.with_name(f"{source.stem}.{suffix}.webp") run(["magick", str(source), *operations, str(target)]) return target def choose_smaller_original(source: Path, info: ImageInfo) -> tuple[int, int]: original_size = source.stat().st_size extension = source.suffix.lower() candidates: list[Path] = [] try: if extension in {".jpg", ".jpeg"}: jpeg_profiles = [ ["-strip", "-sampling-factor", "4:2:0", "-quality", "86", "-interlace", "Plane"], ] if classify_large_image(original_size, info.width, info.height): jpeg_profiles.extend( [ ["-strip", "-sampling-factor", "4:2:0", "-quality", "82", "-interlace", "Plane"], ["-strip", "-sampling-factor", "4:2:0", "-quality", "78", "-interlace", "Plane"], ] ) if max(info.width, info.height) > MAX_DIMENSION: jpeg_profiles.extend( [ [ "-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-sampling-factor", "4:2:0", "-quality", "84", "-interlace", "Plane", ], [ "-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-sampling-factor", "4:2:0", "-quality", "80", "-interlace", "Plane", ], ] ) for index, profile in enumerate(jpeg_profiles): candidates.append(render_candidate(source, profile, f"jpg-opt-{index}")) elif extension == ".png": candidates.append( render_candidate( source, ["-strip", "-define", "png:compression-level=9", "-define", "png:compression-filter=5"], "png-opt", ) ) elif extension == ".gif": candidates.append(render_candidate(source, ["-strip"], "gif-opt")) best = min([source, *candidates], key=lambda candidate: candidate.stat().st_size) best_size = best.stat().st_size if best != source: best.replace(source) return original_size, best_size finally: for candidate in candidates: if candidate.exists(): candidate.unlink() def choose_webp(source: Path, info: ImageInfo, target_path: Path) -> tuple[bool, int]: source_size = source.stat().st_size extension = source.suffix.lower() candidates: list[Path] = [] try: if extension in {".jpg", ".jpeg"}: profiles = [ ["-strip", "-quality", "84", "-define", "webp:method=6"], ["-strip", "-quality", "80", "-define", "webp:method=6"], ] if classify_large_image(source_size, info.width, info.height): profiles.append(["-strip", "-quality", "76", "-define", "webp:method=6"]) if max(info.width, info.height) > MAX_DIMENSION: profiles.extend( [ ["-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "82", "-define", "webp:method=6"], ["-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "78", "-define", "webp:method=6"], ] ) elif extension == ".png": if info.colors <= 256 and source_size < SMALL_FILE_LIMIT: profiles = [ ["-strip", "-define", "webp:lossless=true", "-define", "webp:method=6"], ] else: profiles = [ ["-strip", "-quality", "92", "-define", "webp:alpha-quality=95", "-define", "webp:method=6"], ["-strip", "-quality", "88", "-define", "webp:alpha-quality=90", "-define", "webp:method=6"], ] if max(info.width, info.height) > MAX_DIMENSION: profiles.extend( [ [ "-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "90", "-define", "webp:alpha-quality=92", "-define", "webp:method=6", ], [ "-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "86", "-define", "webp:alpha-quality=88", "-define", "webp:method=6", ], ] ) else: profiles = [["-strip", "-define", "webp:lossless=true", "-define", "webp:method=6"]] for index, profile in enumerate(profiles): candidates.append(render_webp_candidate(source, profile, f"webp-opt-{index}")) best = min(candidates, key=lambda candidate: candidate.stat().st_size) best_size = best.stat().st_size if should_keep_conversion(source_size, best_size): best.replace(target_path) accepted = True else: if target_path.exists(): target_path.unlink() accepted = False return accepted, best_size finally: for candidate in candidates: if candidate.exists(): candidate.unlink() def build_replacements(accepted_sources: list[Path], duplicate_stems: set[str]) -> dict[str, str]: replacements: dict[str, str] = {} for source in accepted_sources: relative = source.relative_to(ROOT).as_posix() target = build_webp_path(source, duplicate_stems).relative_to(ROOT).as_posix() replacements[relative] = target if relative.startswith("assets/images/"): replacements[relative.replace("assets/images/", "../images/", 1)] = target.replace( "assets/images/", "../images/", 1 ) replacements[relative.replace("assets/", "", 1)] = target.replace("assets/", "", 1) return replacements def referenced_asset_paths() -> set[str]: references: set[str] = set() pattern = re.compile(r"(assets/images|../images|images)/[^\"')\s>]+\.(?:png|jpg|jpeg|gif|webp|svg|ico)", re.IGNORECASE) for path in text_files(): content = path.read_text(encoding="utf-8") for match in pattern.finditer(content): ref = match.group(0) candidates = [ROOT / ref, path.parent / ref] for candidate in candidates: if candidate.exists() and candidate.is_file(): references.add(candidate.resolve().relative_to(ROOT).as_posix()) break return references def should_prune_original(source: Path, referenced_paths: set[str], accepted_webp_paths: set[str]) -> bool: source_key = source.as_posix() if not source.is_absolute() else source.relative_to(ROOT).as_posix() if source_key in referenced_paths: return False return any( webp_key in accepted_webp_paths for webp_key in { source.with_suffix(".webp").as_posix() if not source.is_absolute() else source.with_suffix(".webp").relative_to(ROOT).as_posix(), source.with_name(f"{source.name}.webp").as_posix() if not source.is_absolute() else source.with_name(f"{source.name}.webp").relative_to(ROOT).as_posix(), } ) def update_references(accepted_sources: list[Path], duplicate_stems: set[str]) -> int: replacements = build_replacements(accepted_sources, duplicate_stems) if not replacements: return 0 pattern = re.compile("|".join(sorted((re.escape(item) for item in replacements), key=len, reverse=True))) changed = 0 for path in text_files(): original = path.read_text(encoding="utf-8") updated = pattern.sub(lambda match: replacements[match.group(0)], original) if updated != original: path.write_text(updated, encoding="utf-8") changed += 1 return changed def prune_replaced_originals(images: list[Path], duplicate_stems: set[str]) -> int: references = referenced_asset_paths() accepted_webp_paths = { build_webp_path(source, duplicate_stems).relative_to(ROOT).as_posix() for source in images if build_webp_path(source, duplicate_stems).exists() } pruned = 0 for source in images: if should_prune_original(source, references, accepted_webp_paths): source.unlink() pruned += 1 return pruned def check_rewritten_webps() -> int: missing: list[str] = [] pattern = re.compile(r"(assets/images|../images|images)/[^\"')\s>]+\.webp", re.IGNORECASE) for path in text_files(): content = path.read_text(encoding="utf-8") for match in pattern.finditer(content): ref = match.group(0) candidates = [ ROOT / ref, path.parent / ref, ] if not any(candidate.exists() and candidate.is_file() for candidate in candidates): missing.append(f"{path.relative_to(ROOT)}: {ref}") if missing: print("\n".join(missing)) return 1 print("No missing rewritten webp references.") return 0 def ensure_backup_copy() -> None: if BACKUP_DIR.exists(): shutil.rmtree(BACKUP_DIR) BACKUP_DIR.mkdir(parents=True, exist_ok=True) for child in ASSETS_DIR.iterdir(): destination = BACKUP_DIR / child.name if child.is_dir(): shutil.copytree(child, destination) else: shutil.copy2(child, destination) def optimize_assets() -> int: if shutil.which("magick") is None: raise SystemExit("ImageMagick `magick` 未安装,无法继续处理图片。") ensure_backup_copy() images = source_images() duplicate_stems = colliding_stems(images) total_before = 0 total_after_original = 0 accepted_sources: list[Path] = [] accepted_count = 0 skipped_count = 0 for source in images: info = image_info(source) before_size, after_original_size = choose_smaller_original(source, info) total_before += before_size total_after_original += after_original_size accepted, _ = choose_webp(source, image_info(source), build_webp_path(source, duplicate_stems)) if accepted: accepted_sources.append(source) accepted_count += 1 else: skipped_count += 1 changed_files = update_references(accepted_sources, duplicate_stems) print(f"Backup refreshed at: {BACKUP_DIR.relative_to(ROOT)}") print(f"Raster size before optimization: {total_before / 1024 / 1024:.2f} MB") print(f"Raster size after original optimization: {total_after_original / 1024 / 1024:.2f} MB") print(f"Accepted webp conversions: {accepted_count}") print(f"Rejected webp conversions: {skipped_count}") print(f"Updated text files: {changed_files}") return 0 def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--check", action="store_true", help="validate rewritten webp references") parser.add_argument("--prune-replaced", action="store_true", help="delete originals already replaced by accepted webp files") args = parser.parse_args() if args.check: return check_rewritten_webps() if args.prune_replaced: images = source_images() duplicate_stems = colliding_stems(images) pruned = prune_replaced_originals(images, duplicate_stems) print(f"Pruned replaced originals: {pruned}") return 0 return optimize_assets() if __name__ == "__main__": raise SystemExit(main())