import logging import asyncio import random import os import tarfile import traceback import uuid import subprocess import shutil from pathlib import Path from typing import Optional from urllib.parse import urlparse, unquote import aiohttp from datetime import datetime, timezone from mediafile import MediaFile # type: ignore[import] from rq import get_current_job from utils.sr_wrapper import SRUtil from dotenv import load_dotenv import re # ---------- Config ---------- ROOT_DIR = Path("/storage/music2") MAX_RETRIES = 5 THROTTLE_MIN = 1.0 THROTTLE_MAX = 3.5 DISCORD_WEBHOOK = os.getenv("TRIP_WEBHOOK_URI", "").strip() HEADERS = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/116.0.5845.97 Safari/537.36" ), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Connection": "keep-alive", } logging.basicConfig( level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) load_dotenv() sr = SRUtil() # ---------- Discord helper ---------- async def discord_notify(webhook_url: str, title: str, description: str, target: Optional[str] = None, color: int = 0x00FF00): embed = { "title": title, "description": description[:1900] if description else "", "color": color, "timestamp": datetime.now(timezone.utc).isoformat(), } if target: embed["fields"] = [{"name": "Target", "value": str(target), "inline": True}] payload = { "embeds": [embed], } while True: # permanent retry try: async with aiohttp.ClientSession() as session: async with session.post(webhook_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as resp: if resp.status >= 400: text = await resp.text() raise RuntimeError(f"Discord webhook failed ({resp.status}): {text}") break except Exception as e: print(f"Discord send failed, retrying: {e}") await asyncio.sleep(5) def send_log_to_discord(message: str, level: str, target: Optional[str] = None): colors = {"WARNING": 0xFFA500, "ERROR": 0xFF0000, "CRITICAL": 0xFF0000} color = colors.get(level.upper(), 0xFFFF00) async def _send(): await discord_notify( webhook_url=DISCORD_WEBHOOK, title=f"{level} in bulk_download", description=message, target=target, color=color ) try: asyncio.get_running_loop() # already in an event loop — schedule a task asyncio.create_task(_send()) except RuntimeError: # not in an event loop — safe to run asyncio.run(_send()) # ---------- Helpers ---------- def tag_with_mediafile(file_path: str, meta: dict): f = MediaFile(file_path) def safe_set(attr, value, default=None, cast=None): if value is None: value = default if value is not None: if cast: setattr(f, attr, cast(value)) else: setattr(f, attr, str(value)) safe_set("title", meta.get("title"), default="Unknown Title") safe_set("artist", meta.get("artist"), default="Unknown Artist") safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist") safe_set("album", meta.get("album"), default="Unknown Album") safe_set("track", meta.get("track_number"), default=0, cast=int) safe_set("disc", meta.get("disc_number"), default=0, cast=int) safe_set("isrc", meta.get("isrc"), default="") safe_set("bpm", meta.get("bpm"), default=0, cast=int) release_date_str = meta.get("release_date") release_date_obj = None if release_date_str: try: release_date_obj = datetime.fromisoformat(release_date_str).date() except ValueError: try: release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date() except Exception: pass if release_date_obj: f.date = release_date_obj f.save() def sanitize_filename(name: str) -> str: if not name: return "Unknown" name = name.replace("/", "-").replace("\\", "-") name = re.sub(r'[<>:"|?*\x00-\x1F]', "", name) name = name.strip().strip(".") name = re.sub(r"\s+", " ", name) return name[:180] or "Unknown" def ensure_unique_path(p: Path) -> Path: parent = p.parent stem, suffix = p.stem, p.suffix existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()} candidate = f"{stem}{suffix}" if candidate not in existing: return parent / candidate counter = 2 while True: candidate = f"{stem} ({counter}){suffix}" if candidate not in existing: return parent / candidate counter += 1 # ---------- bulk_download ---------- def bulk_download(track_list: list, quality: str = "FLAC"): """ RQ job: - fetches stream URLs - downloads with retries + throttling - uses SR metadata to name/organize files - creates ONE tarball for all tracks - returns [tarball_path] - sends relevant messages to Discord """ job = get_current_job() job_id = job.id if job else uuid.uuid4().hex target = job.meta.get("target") if job else None staging_root = ROOT_DIR / job_id if job: try: job.meta["track_ids"] = [str(t) for t in (track_list or [])] job.meta["tracks"] = [] job.meta["progress"] = 0 job.meta["tarball"] = None job.meta["status"] = "Started" job.save_meta() except Exception as e: send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target) # Job started Discord message asyncio.run(discord_notify( DISCORD_WEBHOOK, title=f"Job Started: {job_id}", description=f"Processing `{len(track_list)}` track(s)", target=target, color=0x00FFFF )) async def process_tracks(): per_track_meta = [] all_final_files = [] all_artists = set() (ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True) async with aiohttp.ClientSession(headers=HEADERS) as session: total = len(track_list or []) for i, track_id in enumerate(track_list or []): track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0} attempt = 0 while attempt < MAX_RETRIES: tmp_file = None attempt += 1 track_info["attempts"] = attempt try: url = await sr.get_stream_url_by_track_id(track_id, quality) if not url: raise RuntimeError("No stream URL") parsed = urlparse(url) clean_path = unquote(parsed.path) ext = Path(clean_path).suffix or ".mp3" tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}") async with session.get(url) as resp: resp.raise_for_status() with open(tmp_file, "wb") as f: async for chunk in resp.content.iter_chunked(64 * 1024): f.write(chunk) md = await sr.get_metadata_by_track_id(track_id) or {} artist_raw = md.get("artist") or "Unknown Artist" album_raw = md.get("album") or "Unknown Album" title_raw = md.get("title") or f"Track {track_id}" artist = sanitize_filename(artist_raw) album = sanitize_filename(album_raw) title = sanitize_filename(title_raw) all_artists.add(artist) album_dir = staging_root / artist / album album_dir.mkdir(parents=True, exist_ok=True) final_file = ensure_unique_path(album_dir / f"{title}{ext}") tag_with_mediafile(str(tmp_file), md) tmp_file.rename(final_file) tmp_file = None track_info["status"] = "Success" track_info["file_path"] = str(final_file) track_info["error"] = None all_final_files.append(final_file) if job: job.meta["progress"] = int(((i + 1) / total) * 100) job.meta["tracks"] = per_track_meta + [track_info] job.save_meta() break except aiohttp.ClientResponseError as e: msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}" send_log_to_discord(msg, "WARNING", target) if e.status == 429: wait_time = min(60, 2**attempt) await asyncio.sleep(wait_time) else: await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) except Exception as e: tb = traceback.format_exc() msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}" send_log_to_discord(msg, "ERROR", target) track_info["error"] = str(e) if attempt >= MAX_RETRIES: track_info["status"] = "Failed" send_log_to_discord(f"Track {track_id} failed after {attempt} attempts", "ERROR", target) await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) finally: try: if tmp_file and tmp_file.exists(): os.remove(tmp_file) except Exception: pass per_track_meta.append(track_info) if not all_final_files: if job: job.meta["tarball"] = None job.meta["status"] = "Failed" job.save_meta() send_log_to_discord(f"No tracks were successfully downloaded for job `{job_id}`", "CRITICAL", target) return [] # Tarball creation artist_counts = {} for t in per_track_meta: if t["status"] == "Success" and t.get("file_path"): try: artist = Path(t["file_path"]).relative_to(staging_root).parts[0] except Exception: artist = "Unknown Artist" artist_counts[artist] = artist_counts.get(artist, 0) + 1 top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist" combined_artist = sanitize_filename(top_artist) staged_tarball = staging_root / f"{combined_artist}.tar.gz" counter = 1 base_name = staged_tarball.stem while staged_tarball.exists(): counter += 1 staged_tarball = staging_root / f"{base_name} ({counter}).tar.gz" final_tarball = ROOT_DIR / "completed" / quality / staged_tarball.name final_tarball.parent.mkdir(parents=True, exist_ok=True) if job: job.meta["status"] = "Compressing" job.save_meta() logging.info("Creating tarball: %s", staged_tarball) await discord_notify(DISCORD_WEBHOOK, title=f"Compressing: Job {job_id}", description=f"Creating tarball (`{len(track_list)}` track(s)).\nStaging path: {staged_tarball}", color=0xFFA500, target=target) try: subprocess.run( ["tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root)] + [str(f.relative_to(staging_root)) for f in all_final_files], check=True, ) for f in all_final_files: try: os.remove(f) except Exception: pass except FileNotFoundError: send_log_to_discord("pigz not available, falling back to tarfile (slower).", "WARNING", target) with tarfile.open(staged_tarball, "w:gz") as tar: for f in all_final_files: try: arcname = f.relative_to(staging_root) except ValueError: arcname = f.name tar.add(f, arcname=str(arcname)) try: os.remove(f) except Exception: pass if not staged_tarball.exists(): send_log_to_discord(f"Tarball was not created: `{staged_tarball}`", "CRITICAL", target) if job: job.meta["status"] = "compress_failed" job.save_meta() return [] try: staged_tarball.rename(final_tarball) except Exception: shutil.move(str(staged_tarball), str(final_tarball)) await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True) if job: job.meta["tarball"] = str(final_tarball) job.meta["progress"] = 100 job.meta["status"] = "Completed" job.save_meta() # Job completed Discord message await discord_notify( DISCORD_WEBHOOK, title=f"Job Completed: {job_id}", description=f"Processed `{len(track_list)}` track(s). Tarball: `{final_tarball}`", target=target, color=0x00FF00 ) return [str(final_tarball)] loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: return loop.run_until_complete(process_tracks()) except Exception as e: send_log_to_discord(f"bulk_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target) if job: job.meta["status"] = "Failed" job.save_meta() finally: loop.close()