TRip: change file naming, use pigz for faster tarball creation

This commit is contained in:
2025-08-29 10:23:06 -04:00
parent a11748775e
commit a57173b90a

View File

@@ -4,6 +4,7 @@ import random
import os import os
import tarfile import tarfile
import uuid import uuid
import subprocess
import shutil import shutil
import re import re
from pathlib import Path from pathlib import Path
@@ -72,15 +73,30 @@ def sanitize_filename(name: str) -> str:
# Reasonable length cap # Reasonable length cap
return name[:180] or "Unknown" return name[:180] or "Unknown"
def ensure_unique_path(p: Path) -> Path: def ensure_unique_path(p: Path) -> Path:
"""Always append a short UUID fragment before the extension.""" """
stem, suffix = p.stem, p.suffix Ensure the given file or directory path is unique.
If a conflict exists, append (2), (3), ... until it's unique.
"""
parent = p.parent parent = p.parent
stem, suffix = p.stem, p.suffix
short_id = uuid.uuid4().hex[:8] # If suffix is empty → directory case
return parent / f"{stem}_{short_id}{suffix}" if p.is_dir() or suffix == "":
candidate = parent / stem
counter = 2
while candidate.exists():
candidate = parent / f"{stem} ({counter})"
counter += 1
return candidate
# File case
candidate = parent / f"{stem}{suffix}"
counter = 2
while candidate.exists():
candidate = parent / f"{stem} ({counter}){suffix}"
counter += 1
return candidate
# ---------- Job ---------- # ---------- Job ----------
def bulk_download(track_list: list, quality: str = "FLAC"): def bulk_download(track_list: list, quality: str = "FLAC"):
@@ -168,10 +184,13 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
all_artists.add(artist) all_artists.add(artist)
# 5) Final path # 5) Final path
final_dir = ROOT_DIR / artist / album artist_dir = ROOT_DIR / artist
final_dir.mkdir(parents=True, exist_ok=True) album_dir = artist_dir / album
final_file = ensure_unique_path(final_dir / f"{title}{ext}") album_dir.mkdir(parents=True, exist_ok=True)
# Only ensure uniqueness at the file level
final_file = ensure_unique_path(album_dir / f"{title}{ext}")
tmp_file.rename(final_file) tmp_file.rename(final_file)
tmp_file = None # consumed tmp_file = None # consumed
@@ -265,17 +284,34 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
# Run blocking tar creation in background thread # Run blocking tar creation in background thread
def _create_tar_sync(): def _create_tar_sync():
with tarfile.open(staged_tarball, "w:gz") as tar: try:
subprocess.run(
[
"tar", "-I", "pigz -9", "-cf", str(staged_tarball),
"-C", str(ROOT_DIR)
] + [str(f.relative_to(ROOT_DIR)) for f in all_final_files],
check=True
)
# cleanup files after successful tar
for f in all_final_files: for f in all_final_files:
try:
arcname = f.relative_to(ROOT_DIR)
except ValueError:
arcname = f.name
tar.add(f, arcname=str(arcname))
try: try:
os.remove(f) os.remove(f)
except Exception: except Exception:
pass pass
except FileNotFoundError:
# pigz or tar not available → fallback to Python tarfile
logging.warning("pigz not available, falling back to tarfile (slower).")
with tarfile.open(staged_tarball, "w:gz") as tar:
for f in all_final_files:
try:
arcname = f.relative_to(ROOT_DIR)
except ValueError:
arcname = f.name
tar.add(f, arcname=str(arcname))
try:
os.remove(f)
except Exception:
pass
await asyncio.to_thread(_create_tar_sync) await asyncio.to_thread(_create_tar_sync)