import logging import asyncio import random import os import tarfile import traceback import uuid import subprocess import shutil import re from pathlib import Path from urllib.parse import urlparse, unquote import aiohttp from datetime import datetime from mediafile import MediaFile # type: ignore[import] from rq import get_current_job from utils.sr_wrapper import SRUtil # ---------- Config ---------- ROOT_DIR = Path("/storage/music2") MAX_RETRIES = 5 THROTTLE_MIN = 1.7 THROTTLE_MAX = 10.0 HEADERS = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/116.0.5845.97 Safari/537.36" ), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Connection": "keep-alive", } logging.basicConfig( level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) sr = SRUtil() # ---------- Helpers ---------- def tag_with_mediafile(file_path: str, meta: dict): f = MediaFile(file_path) # --- Helper to safely set textual/number fields --- def safe_set(attr, value, default=None, cast=None): if value is None: value = default if value is not None: if cast is not None: setattr(f, attr, cast(value)) else: setattr(f, attr, str(value)) # --- Basic textual metadata --- safe_set("title", meta.get("title"), default="Unknown Title") safe_set("artist", meta.get("artist"), default="Unknown Artist") safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist") safe_set("album", meta.get("album"), default="Unknown Album") safe_set("track", meta.get("track_number"), default=0, cast=int) safe_set("disc", meta.get("disc_number"), default=0, cast=int) safe_set("isrc", meta.get("isrc"), default="") safe_set("bpm", meta.get("bpm"), default=0, cast=int) # --- Release date --- release_date_str = meta.get("release_date") release_date_obj = None if release_date_str: try: release_date_obj = datetime.fromisoformat(release_date_str).date() except ValueError: try: # fallback if only year string release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date() except Exception: pass if release_date_obj: f.date = release_date_obj # --- Save all tags --- f.save() def cleanup_empty_dirs(root: Path): """ Recursively remove any directories under root that contain no files (empty or only empty subdirectories). """ for dirpath, dirnames, filenames in os.walk(root, topdown=False): p = Path(dirpath) has_file = any(f.is_file() for f in p.rglob("*")) if not has_file: try: p.rmdir() except Exception: pass def sanitize_filename(name: str) -> str: """Make a string safe for file/dir names.""" if not name: return "Unknown" name = name.replace("/", "-").replace("\\", "-") name = re.sub(r'[<>:"|?*\x00-\x1F]', "", name) name = name.strip().strip(".") name = re.sub(r"\s+", " ", name) return name[:180] or "Unknown" def ensure_unique_path(p: Path) -> Path: """ Ensure the given file or directory path is unique *within its parent folder*. Only appends (2), (3)... if a real conflict exists in that folder. """ parent = p.parent stem, suffix = p.stem, p.suffix existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()} candidate = f"{stem}{suffix}" if candidate not in existing: return parent / candidate counter = 2 while True: candidate = f"{stem} ({counter}){suffix}" if candidate not in existing: return parent / candidate counter += 1 # ---------- Job ---------- def bulk_download(track_list: list, quality: str = "FLAC"): """ RQ job: - fetches stream URLs - downloads with retries + throttling - uses SR metadata to name/organize files - creates ONE tarball for all tracks - returns [tarball_path] """ job = get_current_job() job_id = job.id if job else uuid.uuid4().hex staging_root = ROOT_DIR / job_id if job: try: job.meta["track_ids"] = [str(t) for t in (track_list or [])] job.meta["tracks"] = [] job.meta["progress"] = 0 job.meta["tarball"] = None job.meta["status"] = "Started" job.save_meta() except Exception as e: logging.warning("Failed to init job.meta: %s", e) async def process_tracks(): per_track_meta = [] all_final_files = [] all_artists = set() (ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True) async with aiohttp.ClientSession(headers=HEADERS) as session: total = len(track_list or []) logging.critical("Total tracks to process: %s", total) if job: job.meta["progress"] = 0 job.save_meta() for i, track_id in enumerate(track_list or []): track_info = { "track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0, } attempt = 0 while attempt < MAX_RETRIES: tmp_file = None attempt += 1 track_info["attempts"] = attempt try: url = await sr.get_stream_url_by_track_id(track_id, quality) if not url: raise RuntimeError("No stream URL") parsed = urlparse(url) clean_path = unquote(parsed.path) ext = Path(clean_path).suffix or ".mp3" tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}") async with session.get(url) as resp: resp.raise_for_status() with open(tmp_file, "wb") as f: async for chunk in resp.content.iter_chunked(64 * 1024): f.write(chunk) md = await sr.get_metadata_by_track_id(track_id) or {} logging.info("Metadata for %s: %s", track_id, md) artist_raw = md.get("artist") or "Unknown Artist" album_raw = md.get("album") or "Unknown Album" title_raw = md.get("title") or f"Track {track_id}" artist = sanitize_filename(artist_raw) album = sanitize_filename(album_raw) title = sanitize_filename(title_raw) all_artists.add(artist) artist_dir = staging_root / artist album_dir = artist_dir / album album_dir.mkdir(parents=True, exist_ok=True) final_file = ensure_unique_path(album_dir / f"{title}{ext}") tag_with_mediafile(str(tmp_file), md) tmp_file.rename(final_file) tmp_file = None track_info["status"] = "Success" track_info["file_path"] = str(final_file) track_info["error"] = None all_final_files.append(final_file) if job: job.meta["progress"] = int(((i + 1) / total) * 100) job.save_meta() break except aiohttp.ClientResponseError as e: if e.status == 429: wait_time = min(60, 2**attempt) # exponential up to 60s logging.warning( "Rate limited (429). Sleeping %s seconds", wait_time ) await asyncio.sleep(wait_time) else: await asyncio.sleep( random.uniform(THROTTLE_MIN, THROTTLE_MAX) ) except Exception as e: logging.error( "Track %s attempt %s failed: %s", track_id, attempt, e ) traceback.print_exc() track_info["error"] = str(e) if attempt >= MAX_RETRIES: track_info["status"] = "Failed" await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) finally: try: await session.close() if tmp_file and tmp_file.exists(): tmp_file.unlink() except Exception: pass per_track_meta.append(track_info) if job: try: job.meta["tracks"] = per_track_meta job.save_meta() except Exception as e: logging.warning( "Failed to update job.meta after track %s: %s", track_id, e ) await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) if not all_final_files: if job: try: job.meta["tarball"] = None job.meta["status"] = "Failed" job.save_meta() except Exception: pass return [] artist_counts: dict[str, int] = {} for t in per_track_meta: if t["status"] == "Success" and t.get("file_path"): try: artist = Path(t["file_path"]).relative_to(staging_root).parts[0] except Exception: artist = "Unknown Artist" artist_counts[artist] = artist_counts.get(artist, 0) + 1 if artist_counts: top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[ 0 ][0] else: top_artist = "Unknown Artist" combined_artist = sanitize_filename(top_artist) short_id = uuid.uuid4().hex[:8] staged_tarball = staging_root / f"{combined_artist}_{short_id}.tar.gz" final_tarball = ROOT_DIR / "completed" / quality / staged_tarball.name final_tarball.parent.mkdir(parents=True, exist_ok=True) if job: try: job.meta["status"] = "Compressing" job.save_meta() except Exception: pass logging.info("Creating tarball: %s", staged_tarball) def _create_tar_sync(): try: subprocess.run( [ "tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root), ] + [str(f.relative_to(staging_root)) for f in all_final_files], check=True, ) for f in all_final_files: try: os.remove(f) except Exception: pass except FileNotFoundError: logging.warning("pigz not available, falling back to tarfile (slower).") with tarfile.open(staged_tarball, "w:gz") as tar: for f in all_final_files: try: arcname = f.relative_to(staging_root) except ValueError: arcname = f.name tar.add(f, arcname=str(arcname)) try: os.remove(f) except Exception: pass await asyncio.to_thread(_create_tar_sync) if not staged_tarball.exists(): logging.error("Tarball was not created: %s", staged_tarball) if job: try: job.meta["status"] = "compress_failed" job.save_meta() except Exception: pass return [] logging.critical("Tarball created: %s", staged_tarball) try: staged_tarball.rename(final_tarball) except Exception: shutil.move(str(staged_tarball), str(final_tarball)) logging.critical("Tarball finalized: %s", final_tarball) await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True) if job: job.meta["tarball"] = str(final_tarball) job.meta["progress"] = 100 job.meta["status"] = "Completed" job.save_meta() return [str(final_tarball)] loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: return loop.run_until_complete(process_tracks()) except Exception as e: if job: job.meta["status"] = "Failed" job.save_meta() logging.critical("Exception: %s", str(e)) finally: loop.close()