428 lines
16 KiB
Python
428 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
from collections import Counter
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
ASSETS_DIR = ROOT / "assets"
|
|
IMAGES_DIR = ASSETS_DIR / "images"
|
|
BACKUP_DIR = ROOT / "assets_backup"
|
|
TEXT_EXTENSIONS = {".html", ".css", ".js"}
|
|
SOURCE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif"}
|
|
MANAGED_IMAGE_EXTENSIONS = SOURCE_EXTENSIONS | {".webp"}
|
|
MAX_FILE_SIZE = 300_000
|
|
MAX_DIMENSION = 2560
|
|
SMALL_FILE_LIMIT = 150 * 1024
|
|
SMALL_MIN_SAVINGS = 8 * 1024
|
|
LARGE_MIN_RATIO = 0.05
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ImageInfo:
|
|
width: int
|
|
height: int
|
|
colors: int
|
|
|
|
|
|
def run(cmd: list[str], capture: bool = False) -> str:
|
|
result = subprocess.run(
|
|
cmd,
|
|
check=True,
|
|
text=True,
|
|
capture_output=capture,
|
|
)
|
|
return result.stdout.strip() if capture else ""
|
|
|
|
|
|
def should_keep_conversion(source_size: int, candidate_size: int) -> bool:
|
|
savings = source_size - candidate_size
|
|
if savings <= 0:
|
|
return False
|
|
if source_size < SMALL_FILE_LIMIT:
|
|
return savings >= SMALL_MIN_SAVINGS
|
|
return (savings / source_size) >= LARGE_MIN_RATIO
|
|
|
|
|
|
def classify_large_image(source_size: int, width: int, height: int) -> bool:
|
|
return source_size > MAX_FILE_SIZE or max(width, height) > MAX_DIMENSION
|
|
|
|
|
|
def image_info(path: Path) -> ImageInfo:
|
|
output = run(
|
|
["magick", "identify", "-ping", "-format", "%w %h %k", str(path)],
|
|
capture=True,
|
|
)
|
|
width_text, height_text, colors_text = output.split()
|
|
return ImageInfo(int(width_text), int(height_text), int(colors_text))
|
|
|
|
|
|
def text_files() -> list[Path]:
|
|
return sorted(
|
|
path
|
|
for path in ROOT.rglob("*")
|
|
if path.is_file()
|
|
and path.suffix.lower() in TEXT_EXTENSIONS
|
|
and "assets_backup" not in path.parts
|
|
and ".git" not in path.parts
|
|
and "docs" not in path.parts
|
|
and "tests" not in path.parts
|
|
)
|
|
|
|
|
|
def source_images() -> list[Path]:
|
|
return sorted(
|
|
path
|
|
for path in IMAGES_DIR.rglob("*")
|
|
if path.is_file() and path.suffix.lower() in SOURCE_EXTENSIONS and not path.name.startswith("._")
|
|
)
|
|
|
|
|
|
def colliding_stems(paths: list[Path]) -> set[str]:
|
|
counts = Counter(path.with_suffix("").relative_to(ROOT).as_posix() for path in paths)
|
|
return {stem for stem, count in counts.items() if count > 1}
|
|
|
|
|
|
def build_webp_path(source: Path, duplicate_stems: set[str]) -> Path:
|
|
stem_key = source.with_suffix("").as_posix()
|
|
if not source.is_absolute():
|
|
stem_key = source.with_suffix("").as_posix()
|
|
else:
|
|
stem_key = source.with_suffix("").relative_to(ROOT).as_posix()
|
|
if stem_key in duplicate_stems:
|
|
return source.with_name(f"{source.name}.webp")
|
|
return source.with_suffix(".webp")
|
|
|
|
|
|
def managed_webps() -> list[Path]:
|
|
return sorted(
|
|
path
|
|
for path in IMAGES_DIR.rglob("*.webp")
|
|
if path.is_file() and not path.name.startswith("._")
|
|
)
|
|
|
|
|
|
def render_candidate(source: Path, operations: list[str], suffix: str) -> Path:
|
|
target = source.with_name(f"{source.stem}.{suffix}{source.suffix}")
|
|
run(["magick", str(source), *operations, str(target)])
|
|
return target
|
|
|
|
|
|
def render_webp_candidate(source: Path, operations: list[str], suffix: str) -> Path:
|
|
target = source.with_name(f"{source.stem}.{suffix}.webp")
|
|
run(["magick", str(source), *operations, str(target)])
|
|
return target
|
|
|
|
|
|
def choose_smaller_original(source: Path, info: ImageInfo) -> tuple[int, int]:
|
|
original_size = source.stat().st_size
|
|
extension = source.suffix.lower()
|
|
candidates: list[Path] = []
|
|
try:
|
|
if extension in {".jpg", ".jpeg"}:
|
|
jpeg_profiles = [
|
|
["-strip", "-sampling-factor", "4:2:0", "-quality", "86", "-interlace", "Plane"],
|
|
]
|
|
if classify_large_image(original_size, info.width, info.height):
|
|
jpeg_profiles.extend(
|
|
[
|
|
["-strip", "-sampling-factor", "4:2:0", "-quality", "82", "-interlace", "Plane"],
|
|
["-strip", "-sampling-factor", "4:2:0", "-quality", "78", "-interlace", "Plane"],
|
|
]
|
|
)
|
|
if max(info.width, info.height) > MAX_DIMENSION:
|
|
jpeg_profiles.extend(
|
|
[
|
|
[
|
|
"-strip",
|
|
"-resize",
|
|
f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
|
|
"-sampling-factor",
|
|
"4:2:0",
|
|
"-quality",
|
|
"84",
|
|
"-interlace",
|
|
"Plane",
|
|
],
|
|
[
|
|
"-strip",
|
|
"-resize",
|
|
f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
|
|
"-sampling-factor",
|
|
"4:2:0",
|
|
"-quality",
|
|
"80",
|
|
"-interlace",
|
|
"Plane",
|
|
],
|
|
]
|
|
)
|
|
for index, profile in enumerate(jpeg_profiles):
|
|
candidates.append(render_candidate(source, profile, f"jpg-opt-{index}"))
|
|
elif extension == ".png":
|
|
candidates.append(
|
|
render_candidate(
|
|
source,
|
|
["-strip", "-define", "png:compression-level=9", "-define", "png:compression-filter=5"],
|
|
"png-opt",
|
|
)
|
|
)
|
|
elif extension == ".gif":
|
|
candidates.append(render_candidate(source, ["-strip"], "gif-opt"))
|
|
|
|
best = min([source, *candidates], key=lambda candidate: candidate.stat().st_size)
|
|
best_size = best.stat().st_size
|
|
if best != source:
|
|
best.replace(source)
|
|
return original_size, best_size
|
|
finally:
|
|
for candidate in candidates:
|
|
if candidate.exists():
|
|
candidate.unlink()
|
|
|
|
|
|
def choose_webp(source: Path, info: ImageInfo, target_path: Path) -> tuple[bool, int]:
|
|
source_size = source.stat().st_size
|
|
extension = source.suffix.lower()
|
|
candidates: list[Path] = []
|
|
try:
|
|
if extension in {".jpg", ".jpeg"}:
|
|
profiles = [
|
|
["-strip", "-quality", "84", "-define", "webp:method=6"],
|
|
["-strip", "-quality", "80", "-define", "webp:method=6"],
|
|
]
|
|
if classify_large_image(source_size, info.width, info.height):
|
|
profiles.append(["-strip", "-quality", "76", "-define", "webp:method=6"])
|
|
if max(info.width, info.height) > MAX_DIMENSION:
|
|
profiles.extend(
|
|
[
|
|
["-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "82", "-define", "webp:method=6"],
|
|
["-strip", "-resize", f"{MAX_DIMENSION}x{MAX_DIMENSION}>", "-quality", "78", "-define", "webp:method=6"],
|
|
]
|
|
)
|
|
elif extension == ".png":
|
|
if info.colors <= 256 and source_size < SMALL_FILE_LIMIT:
|
|
profiles = [
|
|
["-strip", "-define", "webp:lossless=true", "-define", "webp:method=6"],
|
|
]
|
|
else:
|
|
profiles = [
|
|
["-strip", "-quality", "92", "-define", "webp:alpha-quality=95", "-define", "webp:method=6"],
|
|
["-strip", "-quality", "88", "-define", "webp:alpha-quality=90", "-define", "webp:method=6"],
|
|
]
|
|
if max(info.width, info.height) > MAX_DIMENSION:
|
|
profiles.extend(
|
|
[
|
|
[
|
|
"-strip",
|
|
"-resize",
|
|
f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
|
|
"-quality",
|
|
"90",
|
|
"-define",
|
|
"webp:alpha-quality=92",
|
|
"-define",
|
|
"webp:method=6",
|
|
],
|
|
[
|
|
"-strip",
|
|
"-resize",
|
|
f"{MAX_DIMENSION}x{MAX_DIMENSION}>",
|
|
"-quality",
|
|
"86",
|
|
"-define",
|
|
"webp:alpha-quality=88",
|
|
"-define",
|
|
"webp:method=6",
|
|
],
|
|
]
|
|
)
|
|
else:
|
|
profiles = [["-strip", "-define", "webp:lossless=true", "-define", "webp:method=6"]]
|
|
|
|
for index, profile in enumerate(profiles):
|
|
candidates.append(render_webp_candidate(source, profile, f"webp-opt-{index}"))
|
|
|
|
best = min(candidates, key=lambda candidate: candidate.stat().st_size)
|
|
best_size = best.stat().st_size
|
|
if should_keep_conversion(source_size, best_size):
|
|
best.replace(target_path)
|
|
accepted = True
|
|
else:
|
|
if target_path.exists():
|
|
target_path.unlink()
|
|
accepted = False
|
|
return accepted, best_size
|
|
finally:
|
|
for candidate in candidates:
|
|
if candidate.exists():
|
|
candidate.unlink()
|
|
|
|
|
|
def build_replacements(accepted_sources: list[Path], duplicate_stems: set[str]) -> dict[str, str]:
|
|
replacements: dict[str, str] = {}
|
|
for source in accepted_sources:
|
|
relative = source.relative_to(ROOT).as_posix()
|
|
target = build_webp_path(source, duplicate_stems).relative_to(ROOT).as_posix()
|
|
replacements[relative] = target
|
|
if relative.startswith("assets/images/"):
|
|
replacements[relative.replace("assets/images/", "../images/", 1)] = target.replace(
|
|
"assets/images/", "../images/", 1
|
|
)
|
|
replacements[relative.replace("assets/", "", 1)] = target.replace("assets/", "", 1)
|
|
return replacements
|
|
|
|
|
|
def referenced_asset_paths() -> set[str]:
|
|
references: set[str] = set()
|
|
pattern = re.compile(r"(assets/images|../images|images)/[^\"')\s>]+\.(?:png|jpg|jpeg|gif|webp|svg|ico)", re.IGNORECASE)
|
|
for path in text_files():
|
|
content = path.read_text(encoding="utf-8")
|
|
for match in pattern.finditer(content):
|
|
ref = match.group(0)
|
|
candidates = [ROOT / ref, path.parent / ref]
|
|
for candidate in candidates:
|
|
if candidate.exists() and candidate.is_file():
|
|
references.add(candidate.resolve().relative_to(ROOT).as_posix())
|
|
break
|
|
return references
|
|
|
|
|
|
def should_prune_original(source: Path, referenced_paths: set[str], accepted_webp_paths: set[str]) -> bool:
|
|
source_key = source.as_posix() if not source.is_absolute() else source.relative_to(ROOT).as_posix()
|
|
if source_key in referenced_paths:
|
|
return False
|
|
return any(
|
|
webp_key in accepted_webp_paths
|
|
for webp_key in {
|
|
source.with_suffix(".webp").as_posix() if not source.is_absolute() else source.with_suffix(".webp").relative_to(ROOT).as_posix(),
|
|
source.with_name(f"{source.name}.webp").as_posix() if not source.is_absolute() else source.with_name(f"{source.name}.webp").relative_to(ROOT).as_posix(),
|
|
}
|
|
)
|
|
|
|
|
|
def update_references(accepted_sources: list[Path], duplicate_stems: set[str]) -> int:
|
|
replacements = build_replacements(accepted_sources, duplicate_stems)
|
|
if not replacements:
|
|
return 0
|
|
pattern = re.compile("|".join(sorted((re.escape(item) for item in replacements), key=len, reverse=True)))
|
|
changed = 0
|
|
for path in text_files():
|
|
original = path.read_text(encoding="utf-8")
|
|
updated = pattern.sub(lambda match: replacements[match.group(0)], original)
|
|
if updated != original:
|
|
path.write_text(updated, encoding="utf-8")
|
|
changed += 1
|
|
return changed
|
|
|
|
|
|
def prune_replaced_originals(images: list[Path], duplicate_stems: set[str]) -> int:
|
|
references = referenced_asset_paths()
|
|
accepted_webp_paths = {
|
|
build_webp_path(source, duplicate_stems).relative_to(ROOT).as_posix()
|
|
for source in images
|
|
if build_webp_path(source, duplicate_stems).exists()
|
|
}
|
|
pruned = 0
|
|
for source in images:
|
|
if should_prune_original(source, references, accepted_webp_paths):
|
|
source.unlink()
|
|
pruned += 1
|
|
return pruned
|
|
|
|
|
|
def check_rewritten_webps() -> int:
|
|
missing: list[str] = []
|
|
pattern = re.compile(r"(assets/images|../images|images)/[^\"')\s>]+\.webp", re.IGNORECASE)
|
|
for path in text_files():
|
|
content = path.read_text(encoding="utf-8")
|
|
for match in pattern.finditer(content):
|
|
ref = match.group(0)
|
|
candidates = [
|
|
ROOT / ref,
|
|
path.parent / ref,
|
|
]
|
|
if not any(candidate.exists() and candidate.is_file() for candidate in candidates):
|
|
missing.append(f"{path.relative_to(ROOT)}: {ref}")
|
|
if missing:
|
|
print("\n".join(missing))
|
|
return 1
|
|
print("No missing rewritten webp references.")
|
|
return 0
|
|
|
|
|
|
def ensure_backup_copy() -> None:
|
|
if BACKUP_DIR.exists():
|
|
shutil.rmtree(BACKUP_DIR)
|
|
BACKUP_DIR.mkdir(parents=True, exist_ok=True)
|
|
for child in ASSETS_DIR.iterdir():
|
|
destination = BACKUP_DIR / child.name
|
|
if child.is_dir():
|
|
shutil.copytree(child, destination)
|
|
else:
|
|
shutil.copy2(child, destination)
|
|
|
|
|
|
def optimize_assets() -> int:
|
|
if shutil.which("magick") is None:
|
|
raise SystemExit("ImageMagick `magick` 未安装,无法继续处理图片。")
|
|
|
|
ensure_backup_copy()
|
|
|
|
images = source_images()
|
|
duplicate_stems = colliding_stems(images)
|
|
|
|
total_before = 0
|
|
total_after_original = 0
|
|
accepted_sources: list[Path] = []
|
|
accepted_count = 0
|
|
skipped_count = 0
|
|
|
|
for source in images:
|
|
info = image_info(source)
|
|
before_size, after_original_size = choose_smaller_original(source, info)
|
|
total_before += before_size
|
|
total_after_original += after_original_size
|
|
accepted, _ = choose_webp(source, image_info(source), build_webp_path(source, duplicate_stems))
|
|
if accepted:
|
|
accepted_sources.append(source)
|
|
accepted_count += 1
|
|
else:
|
|
skipped_count += 1
|
|
|
|
changed_files = update_references(accepted_sources, duplicate_stems)
|
|
print(f"Backup refreshed at: {BACKUP_DIR.relative_to(ROOT)}")
|
|
print(f"Raster size before optimization: {total_before / 1024 / 1024:.2f} MB")
|
|
print(f"Raster size after original optimization: {total_after_original / 1024 / 1024:.2f} MB")
|
|
print(f"Accepted webp conversions: {accepted_count}")
|
|
print(f"Rejected webp conversions: {skipped_count}")
|
|
print(f"Updated text files: {changed_files}")
|
|
return 0
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--check", action="store_true", help="validate rewritten webp references")
|
|
parser.add_argument("--prune-replaced", action="store_true", help="delete originals already replaced by accepted webp files")
|
|
args = parser.parse_args()
|
|
if args.check:
|
|
return check_rewritten_webps()
|
|
if args.prune_replaced:
|
|
images = source_images()
|
|
duplicate_stems = colliding_stems(images)
|
|
pruned = prune_replaced_originals(images, duplicate_stems)
|
|
print(f"Pruned replaced originals: {pruned}")
|
|
return 0
|
|
return optimize_assets()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|