压缩图片

2026-04-14 16:33:01 +08:00
parent b62abe219f
commit 380b1623d6
1016 changed files with 62445 additions and 24818 deletions
--- a/scripts/init.py
+++ b/scripts/init.py
@@ -0,0 +1 @@
+
--- a/scripts/pycache/init.cpython-314.pyc
+++ b/scripts/pycache/init.cpython-314.pyc
--- a/scripts/pycache/optimize_assets.cpython-314.pyc
+++ b/scripts/pycache/optimize_assets.cpython-314.pyc
--- a/scripts/optimize_assets.py
+++ b/scripts/optimize_assets.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import argparse
+import re
+import shutil
+import subprocess
+from collections import Counter
+from dataclasses import dataclass
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+ASSETS_DIR = ROOT / "assets"
+IMAGES_DIR = ASSETS_DIR / "images"
+BACKUP_DIR = ROOT / "assets_backup"
+TEXT_EXTENSIONS = {".html", ".css", ".js"}
+SOURCE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif"}
+MANAGED_IMAGE_EXTENSIONS = SOURCE_EXTENSIONS | {".webp"}
+MAX_FILE_SIZE = 300_000
+MAX_DIMENSION = 2560
+SMALL_FILE_LIMIT = 150 * 1024
+SMALL_MIN_SAVINGS = 8 * 1024
+LARGE_MIN_RATIO = 0.05
+
+
+@dataclass(frozen=True)
+class ImageInfo:
+    width: int
+    height: int
+    colors: int
+
+
+def run(cmd: list[str], capture: bool = False) -> str:
+    result = subprocess.run(
+        cmd,
+        check=True,
+        text=True,
+        capture_output=capture,
+    )
+    return result.stdout.strip() if capture else ""
+
+
+def should_keep_conversion(source_size: int, candidate_size: int) -> bool:
+    savings = source_size - candidate_size
+    if savings <= 0:
+        return False
+    if source_size < SMALL_FILE_LIMIT:
+        return savings >= SMALL_MIN_SAVINGS
+    return (savings / source_size) >= LARGE_MIN_RATIO
+
+
+def classify_large_image(source_size: int, width: int, height: int) -> bool:
+    return source_size > MAX_FILE_SIZE or max(width, height) > MAX_DIMENSION
+
+
+def image_info(path: Path) -> ImageInfo:
+    output = run(
+        ["magick", "identify", "-ping", "-format", "%w %h %k", str(path)],
+        capture=True,
+    )
+    width_text, height_text, colors_text = output.split()
+    return ImageInfo(int(width_text), int(height_text), int(colors_text))
+
+
+def text_files() -> list[Path]:
+    return sorted(
+        path
+        for path in ROOT.rglob("*")
+        if path.is_file()
+        and path.suffix.lower() in TEXT_EXTENSIONS
+        and "assets_backup" not in path.parts
+        and ".git" not in path.parts
+        and "docs" not in path.parts
+        and "tests" not in path.parts
+    )
+
+
+def source_images() -> list[Path]:
+    return sorted(
+        path
+        for path in IMAGES_DIR.rglob("*")
+        if path.is_file() and path.suffix.lower() in SOURCE_EXTENSIONS and not path.name.startswith("._")
+    )
+
+
+def colliding_stems(paths: list[Path]) -> set[str]:
+    counts = Counter(path.with_suffix("").relative_to(ROOT).as_posix() for path in paths)
+    return {stem for stem, count in counts.items() if count > 1}
+
+
+def build_webp_path(source: Path, duplicate_stems: set[str]) -> Path:
+    stem_key = source.with_suffix("").as_posix()
+    if not source.is_absolute():
+        stem_key = source.with_suffix("").as_posix()
+    else:
+        stem_key = source.with_suffix("").relative_to(ROOT).as_posix()
+    if stem_key in duplicate_stems:
+        return source.with_name(f"{source.name}.webp")
+    return source.with_suffix(".webp")
+
+
+def managed_webps() -> list[Path]:
+    return sorted(
+        path
+        for path in IMAGES_DIR.rglob("*.webp")
+        if path.is_file() and not path.name.startswith("._")
+    )
+
+
+def render_candidate(source: Path, operations: list[str], suffix: str) -> Path:
+    target = source.with_name(f"{source.stem}.{suffix}{source.suffix}")
+    run(["magick", str(source), *operations, str(target)])
+    return target
+
+
+def render_webp_candidate(source: Path, operations: list[str], suffix: str) -> Path:
+    target = source.with_name(f"{source.stem}.{suffix}.webp")
+    run(["magick", str(source), *operations, str(target)])
+    return target
+
+
+def choose_smaller_original(source: Path, info: ImageInfo) -> tuple[int, int]:
+    original_size = source.stat().st_size
+    extension = source.suffix.lower()
+    candidates: list[Path] = []
+    try:
+        if extension in {".jpg", ".jpeg"}:
+            jpeg_profiles = [
+                ["-strip", "-sampling-factor", "4:2:0", "-quality", "86", "-interlace", "Plane"],
+            ]
+            if classify_large_image(original_size, info.width, info.height):
+                jpeg_profiles.extend(
+                    [
+                        ["-strip", "-sampling-factor", "4:2:0", "-quality", "82", "-interlace", "Plane"],
+                        ["-strip", "-sampling-factor", "4:2:0", "-quality", "78", "-interlace", "Plane"],
+                    ]
+                )
+            if max(info.width, info.height) > MAX_DIMENSION:
+                jpeg_profiles.extend(
+                    [
+                        [
+                            "-strip",
+                            "-resize",
+                            f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
+                            "-sampling-factor",
+                            "4:2:0",
+                            "-quality",
+                            "84",
+                            "-interlace",
+                            "Plane",
+                        ],
+                        [
+                            "-strip",
+                            "-resize",
+                            f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
+                            "-sampling-factor",
+                            "4:2:0",
+                            "-quality",
+                            "80",
+                            "-interlace",
+                            "Plane",
+                        ],
+                    ]
+                )
+            for index, profile in enumerate(jpeg_profiles):
+                candidates.append(render_candidate(source, profile, f"jpg-opt-{index}"))
+        elif extension == ".png":
+            candidates.append(
+                render_candidate(
+                    source,
+                    ["-strip", "-define", "png:compression-level=9", "-define", "png:compression-filter=5"],
+                    "png-opt",
+                )
+            )
+        elif extension == ".gif":
+            candidates.append(render_candidate(source, ["-strip"], "gif-opt"))
+
+        best = min([source, *candidates], key=lambda candidate: candidate.stat().st_size)
+        best_size = best.stat().st_size
+        if best != source:
+            best.replace(source)
+        return original_size, best_size
+    finally:
+        for candidate in candidates:
+            if candidate.exists():
+                candidate.unlink()
+
+
+def choose_webp(source: Path, info: ImageInfo, target_path: Path) -> tuple[bool, int]:
+    source_size = source.stat().st_size
+    extension = source.suffix.lower()
+    candidates: list[Path] = []
+    try:
+        if extension in {".jpg", ".jpeg"}:
+            profiles = [
+                ["-strip", "-quality", "84", "-define", "webp:method=6"],
+                ["-strip", "-quality", "80", "-define", "webp:method=6"],
+            ]
+            if classify_large_image(source_size, info.width, info.height):
+                profiles.append(["-strip", "-quality", "76", "-define", "webp:method=6"])
+            if max(info.width, info.height) > MAX_DIMENSION:
+                profiles.extend(
+                    [
+                        ["-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "82", "-define", "webp:method=6"],
+                        ["-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "78", "-define", "webp:method=6"],
+                    ]
+                )
+        elif extension == ".png":
+            if info.colors <= 256 and source_size < SMALL_FILE_LIMIT:
+                profiles = [
+                    ["-strip", "-define", "webp:lossless=true", "-define", "webp:method=6"],
+                ]
+            else:
+                profiles = [
+                    ["-strip", "-quality", "92", "-define", "webp:alpha-quality=95", "-define", "webp:method=6"],
+                    ["-strip", "-quality", "88", "-define", "webp:alpha-quality=90", "-define", "webp:method=6"],
+                ]
+                if max(info.width, info.height) > MAX_DIMENSION:
+                    profiles.extend(
+                        [
+                            [
+                                "-strip",
+                                "-resize",
+                                f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
+                                "-quality",
+                                "90",
+                                "-define",
+                                "webp:alpha-quality=92",
+                                "-define",
+                                "webp:method=6",
+                            ],
+                            [
+                                "-strip",
+                                "-resize",
+                                f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
+                                "-quality",
+                                "86",
+                                "-define",
+                                "webp:alpha-quality=88",
+                                "-define",
+                                "webp:method=6",
+                            ],
+                        ]
+                    )
+        else:
+            profiles = [["-strip", "-define", "webp:lossless=true", "-define", "webp:method=6"]]
+
+        for index, profile in enumerate(profiles):
+            candidates.append(render_webp_candidate(source, profile, f"webp-opt-{index}"))
+
+        best = min(candidates, key=lambda candidate: candidate.stat().st_size)
+        best_size = best.stat().st_size
+        if should_keep_conversion(source_size, best_size):
+            best.replace(target_path)
+            accepted = True
+        else:
+            if target_path.exists():
+                target_path.unlink()
+            accepted = False
+        return accepted, best_size
+    finally:
+        for candidate in candidates:
+            if candidate.exists():
+                candidate.unlink()
+
+
+def build_replacements(accepted_sources: list[Path], duplicate_stems: set[str]) -> dict[str, str]:
+    replacements: dict[str, str] = {}
+    for source in accepted_sources:
+        relative = source.relative_to(ROOT).as_posix()
+        target = build_webp_path(source, duplicate_stems).relative_to(ROOT).as_posix()
+        replacements[relative] = target
+        if relative.startswith("assets/images/"):
+            replacements[relative.replace("assets/images/", "../images/", 1)] = target.replace(
+                "assets/images/", "../images/", 1
+            )
+            replacements[relative.replace("assets/", "", 1)] = target.replace("assets/", "", 1)
+    return replacements
+
+
+def referenced_asset_paths() -> set[str]:
+    references: set[str] = set()
+    pattern = re.compile(r"(assets/images|../images|images)/[^\"')\s>]+\.(?:png|jpg|jpeg|gif|webp|svg|ico)", re.IGNORECASE)
+    for path in text_files():
+        content = path.read_text(encoding="utf-8")
+        for match in pattern.finditer(content):
+            ref = match.group(0)
+            candidates = [ROOT / ref, path.parent / ref]
+            for candidate in candidates:
+                if candidate.exists() and candidate.is_file():
+                    references.add(candidate.resolve().relative_to(ROOT).as_posix())
+                    break
+    return references
+
+
+def should_prune_original(source: Path, referenced_paths: set[str], accepted_webp_paths: set[str]) -> bool:
+    source_key = source.as_posix() if not source.is_absolute() else source.relative_to(ROOT).as_posix()
+    if source_key in referenced_paths:
+        return False
+    return any(
+        webp_key in accepted_webp_paths
+        for webp_key in {
+            source.with_suffix(".webp").as_posix() if not source.is_absolute() else source.with_suffix(".webp").relative_to(ROOT).as_posix(),
+            source.with_name(f"{source.name}.webp").as_posix() if not source.is_absolute() else source.with_name(f"{source.name}.webp").relative_to(ROOT).as_posix(),
+        }
+    )
+
+
+def update_references(accepted_sources: list[Path], duplicate_stems: set[str]) -> int:
+    replacements = build_replacements(accepted_sources, duplicate_stems)
+    if not replacements:
+        return 0
+    pattern = re.compile("|".join(sorted((re.escape(item) for item in replacements), key=len, reverse=True)))
+    changed = 0
+    for path in text_files():
+        original = path.read_text(encoding="utf-8")
+        updated = pattern.sub(lambda match: replacements[match.group(0)], original)
+        if updated != original:
+            path.write_text(updated, encoding="utf-8")
+            changed += 1
+    return changed
+
+
+def prune_replaced_originals(images: list[Path], duplicate_stems: set[str]) -> int:
+    references = referenced_asset_paths()
+    accepted_webp_paths = {
+        build_webp_path(source, duplicate_stems).relative_to(ROOT).as_posix()
+        for source in images
+        if build_webp_path(source, duplicate_stems).exists()
+    }
+    pruned = 0
+    for source in images:
+        if should_prune_original(source, references, accepted_webp_paths):
+            source.unlink()
+            pruned += 1
+    return pruned
+
+
+def check_rewritten_webps() -> int:
+    missing: list[str] = []
+    pattern = re.compile(r"(assets/images|../images|images)/[^\"')\s>]+\.webp", re.IGNORECASE)
+    for path in text_files():
+        content = path.read_text(encoding="utf-8")
+        for match in pattern.finditer(content):
+            ref = match.group(0)
+            candidates = [
+                ROOT / ref,
+                path.parent / ref,
+            ]
+            if not any(candidate.exists() and candidate.is_file() for candidate in candidates):
+                missing.append(f"{path.relative_to(ROOT)}: {ref}")
+    if missing:
+        print("\n".join(missing))
+        return 1
+    print("No missing rewritten webp references.")
+    return 0
+
+
+def ensure_backup_copy() -> None:
+    if BACKUP_DIR.exists():
+        shutil.rmtree(BACKUP_DIR)
+    BACKUP_DIR.mkdir(parents=True, exist_ok=True)
+    for child in ASSETS_DIR.iterdir():
+        destination = BACKUP_DIR / child.name
+        if child.is_dir():
+            shutil.copytree(child, destination)
+        else:
+            shutil.copy2(child, destination)
+
+
+def optimize_assets() -> int:
+    if shutil.which("magick") is None:
+        raise SystemExit("ImageMagick `magick` 未安装，无法继续处理图片。")
+
+    ensure_backup_copy()
+
+    images = source_images()
+    duplicate_stems = colliding_stems(images)
+
+    total_before = 0
+    total_after_original = 0
+    accepted_sources: list[Path] = []
+    accepted_count = 0
+    skipped_count = 0
+
+    for source in images:
+        info = image_info(source)
+        before_size, after_original_size = choose_smaller_original(source, info)
+        total_before += before_size
+        total_after_original += after_original_size
+        accepted, _ = choose_webp(source, image_info(source), build_webp_path(source, duplicate_stems))
+        if accepted:
+            accepted_sources.append(source)
+            accepted_count += 1
+        else:
+            skipped_count += 1
+
+    changed_files = update_references(accepted_sources, duplicate_stems)
+    print(f"Backup refreshed at: {BACKUP_DIR.relative_to(ROOT)}")
+    print(f"Raster size before optimization: {total_before / 1024 / 1024:.2f} MB")
+    print(f"Raster size after original optimization: {total_after_original / 1024 / 1024:.2f} MB")
+    print(f"Accepted webp conversions: {accepted_count}")
+    print(f"Rejected webp conversions: {skipped_count}")
+    print(f"Updated text files: {changed_files}")
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--check", action="store_true", help="validate rewritten webp references")
+    parser.add_argument("--prune-replaced", action="store_true", help="delete originals already replaced by accepted webp files")
+    args = parser.parse_args()
+    if args.check:
+        return check_rewritten_webps()
+    if args.prune_replaced:
+        images = source_images()
+        duplicate_stems = colliding_stems(images)
+        pruned = prune_replaced_originals(images, duplicate_stems)
+        print(f"Pruned replaced originals: {pruned}")
+        return 0
+    return optimize_assets()
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())