From e1194475b3007cfea15c1f062a255417976682d5 Mon Sep 17 00:00:00 2001 From: codey Date: Thu, 18 Sep 2025 08:13:21 -0400 Subject: [PATCH] misc --- .gitignore | 1 + test/add_cover_art.py | 341 ++++++++++++++++++++++++++++++++++++++ test/test_search_track.py | 23 +++ utils/rip_background.py | 274 +++++++++++++++--------------- utils/sr_wrapper.py | 210 ++++++++++++++++++----- 5 files changed, 661 insertions(+), 188 deletions(-) create mode 100644 test/add_cover_art.py create mode 100644 test/test_search_track.py diff --git a/.gitignore b/.gitignore index 911bc58..567bf3a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ job_review.py check_missing.py **/auth/* .gitignore +.env \ No newline at end of file diff --git a/test/add_cover_art.py b/test/add_cover_art.py new file mode 100644 index 0000000..93808e7 --- /dev/null +++ b/test/add_cover_art.py @@ -0,0 +1,341 @@ +import os +import csv +import re +import time +import sys +import random +import asyncio +import logging +import traceback +import requests +from music_tag import load_file +from rich.console import Console +from rich.table import Table +from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn, TaskProgressColumn +sys.path.insert(0, "..") +from utils.sr_wrapper import SRUtil +from rapidfuzz import fuzz + + +# Helper to strip common parenthetical tags from album names +def strip_album_tags(album): + """Remove common parenthetical tags from the end of album names.""" + pattern = r"\s*\((deluxe|remaster(ed)?|original mix|expanded|bonus|edition|version|mono|stereo|explicit|clean|anniversary|special|reissue|expanded edition|bonus track(s)?|international|digital|single|ep|live|instrumental|karaoke|radio edit|explicit version|clean version|acoustic|demo|re-recorded|remix|mix|edit|feat\.?|featuring|with .+|from .+|soundtrack|ost|score|session|vol(ume)? ?\d+|disc ?\d+|cd ?\d+|lp ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])\)$" + return re.sub(pattern, "", album, flags=re.IGNORECASE).strip() + +# Helper to strip common trailing tags like EP, LP, Single, Album, etc. from album names +def strip_album_suffix(album): + # Remove trailing tags like ' EP', ' LP', ' Single', ' Album', ' Remix', ' Version', etc. + # Only if they appear at the end, case-insensitive, with or without punctuation + suffix_pattern = r"[\s\-_:]*(ep|lp|single|album|remix|version|edit|mix|deluxe|expanded|anniversary|reissue|instrumental|karaoke|ost|score|session|mono|stereo|explicit|clean|bonus|disc ?\d+|cd ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])$" + return re.sub(suffix_pattern, "", album, flags=re.IGNORECASE).strip() +# iTunes/Apple Music API fallback +def search_itunes_cover(artist, album): + """Search iTunes/Apple Music public API for album art.""" + import urllib.parse + base_url = "https://itunes.apple.com/search" + params = { + "term": f"{artist} {album}", + "entity": "album", + "limit": 1, + "media": "music" + } + url = f"{base_url}?{urllib.parse.urlencode(params)}" + try: + resp = requests.get(url, timeout=10) + if resp.status_code != 200: + return None + data = resp.json() + if data.get("resultCount", 0) == 0: + return None + result = data["results"][0] + # Use the highest-res artwork available + art_url = result.get("artworkUrl100") + if art_url: + art_url = art_url.replace("100x100bb", "600x600bb") + img_resp = requests.get(art_url) + if img_resp.status_code == 200: + return img_resp.content + except Exception: + traceback.format_exc() + pass + return None + + +# Fuzzy match helper for metadata +def is_fuzzy_match(expected, actual, threshold=80): + if not expected or not actual: + return False + return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold + +# Fuzzy match for all fields +def is_metadata_match(expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80): + artist_match = is_fuzzy_match(expected_artist, found_artist, threshold) + album_match = is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True + title_match = is_fuzzy_match(expected_title, found_title, threshold) + return artist_match and album_match and title_match + +# Utility to normalize artist/song names for searching +def normalize_name(name): + # Lowercase, strip, remove extra spaces, and remove common punctuation + name = name.lower().strip() + name = re.sub(r"\([0-9]\)$", "", name) # remove (1), (2), etc. at end + name = re.sub(r"[\s_]+", " ", name) + name = re.sub(r"[\(\)\[\]\{\}\'\"\!\?\.,:;`~@#$%^&*+=|\\/<>]", "", name) + return name + +# Suppress noisy loggers (aiohttp, urllib3, etc.) +for noisy_logger in [ + "aiohttp.client", + "aiohttp.server", + "aiohttp.access", + "urllib3", + "asyncio", + "chardet", + "requests.packages.urllib3", +]: + logging.getLogger(noisy_logger).setLevel(logging.CRITICAL) + logging.getLogger(noisy_logger).propagate = False + +# Also suppress root logger to CRITICAL for anything not our own +logging.getLogger().setLevel(logging.CRITICAL) + + +# Directory to scan +MUSIC_DIR = "/storage/music2/completed/FLAC/review" +REPORT_CSV = "cover_art_report.csv" +AUDIO_EXTS = {".flac", ".mp3", ".m4a"} + +console = Console() + +# MusicBrainz API helpers + +# Limit concurrent MusicBrainz requests +MUSICBRAINZ_SEMAPHORE = asyncio.Semaphore(1) + +def search_musicbrainz_cover(artist, album, max_retries=4): + url = f"https://musicbrainz.org/ws/2/release-group/?query=artist:{artist} AND release:{album}&fmt=json" + headers = {"User-Agent": "cover-art-script/1.0"} + delay = 1.5 + for attempt in range(1, max_retries + 1): + # Limit concurrency + loop = asyncio.get_event_loop() + if MUSICBRAINZ_SEMAPHORE.locked(): + loop.run_until_complete(MUSICBRAINZ_SEMAPHORE.acquire()) + else: + MUSICBRAINZ_SEMAPHORE.acquire() + try: + resp = requests.get(url, headers=headers) + if resp.status_code == 503: + console.print(f"[yellow]MusicBrainz 503 error, retrying (attempt {attempt})...[/yellow]") + time.sleep(delay + random.uniform(0, 0.5)) + delay *= 2 + continue + if resp.status_code != 200: + console.print(f"[red]MusicBrainz API error: {resp.status_code}[/red]") + return None + try: + data = resp.json() + except Exception as e: + console.print(f"[red]MusicBrainz API returned invalid JSON for {artist} - {album}: {e}[/red]") + return None + if not data.get("release-groups"): + console.print(f"[red]No release-groups found for {artist} - {album}[/red]") + return None + rgid = data["release-groups"][0]["id"] + caa_url = f"https://coverartarchive.org/release-group/{rgid}/front-500" + caa_resp = requests.get(caa_url) + if caa_resp.status_code == 200: + console.print(f"[green]Found cover art on Cover Art Archive for {artist} - {album}[/green]") + return caa_resp.content + console.print(f"[red]No cover art found on Cover Art Archive for {artist} - {album}[/red]") + return None + finally: + try: + MUSICBRAINZ_SEMAPHORE.release() + except Exception: + pass + console.print(f"[red]MusicBrainz API failed after {max_retries} attempts for {artist} - {album}[/red]") + return None + +async def fetch_srutil_cover(sr, artist, song): + try: + album = await sr.get_album_by_name(artist, song) + if not album or not album.get('id'): + return None + cover_url = await sr.get_cover_by_album_id(album['id'], 640) + if cover_url: + resp = requests.get(cover_url) + if resp.status_code == 200: + return resp.content + else: + console.print(f"[red]SRUtil: Failed to fetch cover art from URL (status {resp.status_code}): {cover_url}[/red]") + except Exception as e: + msg = str(e) + if "Cannot combine AUTHORIZATION header with AUTH argument" in msg: + console.print("[red]SRUtil: Skipping due to conflicting authentication method in dependency (AUTHORIZATION header + AUTH argument).[/red]") + else: + console.print(f"[red]SRUtil: Exception: {e}[/red]") + return None + +def has_cover(file): + try: + f = load_file(file) + has = bool(f['artwork'].first) + return has + except Exception as e: + console.print(f"[red]Error checking cover art for {file}: {e}[/red]") + return False + +def embed_cover(file, image_bytes): + try: + f = load_file(file) + f['artwork'] = image_bytes + f.save() + return True + except Exception as e: + console.print(f"[red]Failed to embed cover art into {file}: {e}[/red]") + return False + +def get_artist_album_title(file): + try: + f = load_file(file) + artist = f['artist'].value or "" + album = f['album'].value or "" + title = f['title'].value or os.path.splitext(os.path.basename(file))[0] + return artist, album, title + except Exception as e: + console.print(f"[red]Error reading tags for {file}: {e}[/red]") + return "", "", os.path.splitext(os.path.basename(file))[0] + + +# Concurrency limit for async processing +CONCURRENCY = 12 + +async def process_file(file, sr, table, results, sem): + async with sem: + if has_cover(file): + table.add_row(file, "Already Present", "-") + results.append([file, "Already Present", "-"]) + return + artist, album, title = get_artist_album_title(file) + # Use a global or passed-in cache dict for album art + if not hasattr(process_file, "album_art_cache"): + process_file.album_art_cache = {} + album_key = (artist, album) + image_bytes = process_file.album_art_cache.get(album_key) + source = "SRUtil" + if image_bytes is None: + image_bytes = await fetch_srutil_cover(sr, artist, album) + if image_bytes: + process_file.album_art_cache[album_key] = image_bytes + if not image_bytes: + image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(album)) + source = "MusicBrainz" + if image_bytes: + process_file.album_art_cache[album_key] = image_bytes + if not image_bytes: + image_bytes = search_itunes_cover(artist, album) + source = "iTunes" + if image_bytes: + process_file.album_art_cache[album_key] = image_bytes + # If all lookups failed, try with parenthetical tag stripped + if not image_bytes and re.search(r"\([^)]*\)$", album): + cleaned_album = strip_album_tags(album) + if cleaned_album and cleaned_album != album: + cleaned_key = (artist, cleaned_album) + image_bytes = process_file.album_art_cache.get(cleaned_key) + if image_bytes is None: + image_bytes = await fetch_srutil_cover(sr, artist, cleaned_album) + if image_bytes: + process_file.album_art_cache[cleaned_key] = image_bytes + if not image_bytes: + image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(cleaned_album)) + source = "MusicBrainz (stripped)" + if image_bytes: + process_file.album_art_cache[cleaned_key] = image_bytes + if not image_bytes: + image_bytes = search_itunes_cover(artist, cleaned_album) + source = "iTunes (stripped)" + if image_bytes: + process_file.album_art_cache[cleaned_key] = image_bytes + + # If still not found, try with common suffixes (EP, LP, etc.) stripped from album name + if not image_bytes: + suffix_stripped_album = strip_album_suffix(album) + if suffix_stripped_album and suffix_stripped_album != album: + suffix_key = (artist, suffix_stripped_album) + image_bytes = process_file.album_art_cache.get(suffix_key) + if image_bytes is None: + image_bytes = await fetch_srutil_cover(sr, artist, suffix_stripped_album) + if image_bytes: + process_file.album_art_cache[suffix_key] = image_bytes + if not image_bytes: + image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(suffix_stripped_album)) + source = "MusicBrainz (suffix-stripped)" + if image_bytes: + process_file.album_art_cache[suffix_key] = image_bytes + if not image_bytes: + image_bytes = search_itunes_cover(artist, suffix_stripped_album) + source = "iTunes (suffix-stripped)" + if image_bytes: + process_file.album_art_cache[suffix_key] = image_bytes + if isinstance(image_bytes, bytes): + ok = embed_cover(file, image_bytes) + status = "Embedded" if ok else "Failed to Embed" + if ok: + console.print(f"[green]Embedded cover art from {source}:[/green] {file}") + else: + console.print(f"[red]Failed to embed cover art ({source}):[/red] {file}") + elif image_bytes: + status = "Failed to Embed (not bytes)" + console.print(f"[red]Failed to embed cover art (not bytes) ({source}):[/red] {file}") + else: + status = "Not Found" + source = "-" + console.print(f"[red]No cover art found:[/red] {file}") + table.add_row(file, status, source) + results.append([file, status, source]) + +async def main(): + console.print(f"[bold blue]Scanning directory: {MUSIC_DIR}[/bold blue]") + sr = SRUtil() + results = [] + files = [] + for root, _, filenames in os.walk(MUSIC_DIR): + for fn in filenames: + if os.path.splitext(fn)[1].lower() in AUDIO_EXTS: + file_path = os.path.join(root, fn) + files.append(file_path) + + table = Table(title="Cover Art Embedding Report") + table.add_column("File", style="cyan", overflow="fold") + table.add_column("Status", style="green") + table.add_column("Source", style="magenta") + + + sem = asyncio.Semaphore(CONCURRENCY) + async def worker(file, sr, table, results, sem, progress, task_id): + await process_file(file, sr, table, results, sem) + progress.update(task_id, advance=1) + + with Progress( + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + TimeElapsedColumn(), + ) as progress: + task_id = progress.add_task("Processing files...", total=len(files)) + # Schedule all workers + await asyncio.gather(*(worker(file, sr, table, results, sem, progress, task_id) for file in files)) + + # Print summary table and CSV after progress bar + console.print(table) + with open(REPORT_CSV, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["File", "Status", "Source"]) + writer.writerows(results) + console.print(f"[bold green]CSV report written to {REPORT_CSV}[/bold green]") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/test/test_search_track.py b/test/test_search_track.py new file mode 100644 index 0000000..516f60b --- /dev/null +++ b/test/test_search_track.py @@ -0,0 +1,23 @@ +import asyncio +import logging +import sys +sys.path.insert(0, "..") +from utils.sr_wrapper import SRUtil + +# logging.getLogger("sr_wrapper").propagate = False +logger = logging.getLogger() +logger.setLevel(logging.CRITICAL) + +async def main(): + sr = SRUtil() + artist, album = "Kadavar - The Sacrament Of Sin".split(" - ") + search_res = await sr.get_album_by_name(artist[:8], album) + logging.critical("Search result: %s", search_res) + album = search_res + _cover = await sr.get_cover_by_album_id(album.get('id'), 640) + # cover = sr._get_tidal_cover_url(album.get('cover'), 640) + logging.critical("Result: %s, Cover: %s", album, _cover) + return + + +asyncio.run(main()) \ No newline at end of file diff --git a/utils/rip_background.py b/utils/rip_background.py index 4321f5c..229eea6 100644 --- a/utils/rip_background.py +++ b/utils/rip_background.py @@ -7,20 +7,23 @@ import traceback import uuid import subprocess import shutil -import re from pathlib import Path +from typing import Optional from urllib.parse import urlparse, unquote import aiohttp -from datetime import datetime +from datetime import datetime, timezone from mediafile import MediaFile # type: ignore[import] from rq import get_current_job from utils.sr_wrapper import SRUtil +from dotenv import load_dotenv +import re # ---------- Config ---------- ROOT_DIR = Path("/storage/music2") MAX_RETRIES = 5 THROTTLE_MIN = 1.0 THROTTLE_MAX = 3.5 +DISCORD_WEBHOOK = os.getenv("TRIP_WEBHOOK_URI", "").strip() HEADERS = { "User-Agent": ( @@ -38,26 +41,71 @@ logging.basicConfig( format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) +load_dotenv() + sr = SRUtil() +# ---------- Discord helper ---------- +async def discord_notify(webhook_url: str, title: str, description: str, target: Optional[str] = None, color: int = 0x00FF00): + embed = { + "title": title, + "description": description[:1900] if description else "", + "color": color, + "timestamp": datetime.now(timezone.utc).isoformat(), + } + if target: + embed["fields"] = [{"name": "Target", "value": str(target), "inline": True}] + + payload = { + "embeds": [embed], + } + + while True: # permanent retry + try: + async with aiohttp.ClientSession() as session: + async with session.post(webhook_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as resp: + if resp.status >= 400: + text = await resp.text() + raise RuntimeError(f"Discord webhook failed ({resp.status}): {text}") + break + except Exception as e: + print(f"Discord send failed, retrying: {e}") + await asyncio.sleep(5) + +def send_log_to_discord(message: str, level: str, target: Optional[str] = None): + colors = {"WARNING": 0xFFA500, "ERROR": 0xFF0000, "CRITICAL": 0xFF0000} + color = colors.get(level.upper(), 0xFFFF00) + + async def _send(): + await discord_notify( + webhook_url=DISCORD_WEBHOOK, + title=f"{level} in bulk_download", + description=message, + target=target, + color=color + ) + + try: + asyncio.get_running_loop() + # already in an event loop — schedule a task + asyncio.create_task(_send()) + except RuntimeError: + # not in an event loop — safe to run + asyncio.run(_send()) + + # ---------- Helpers ---------- - - def tag_with_mediafile(file_path: str, meta: dict): f = MediaFile(file_path) - - # --- Helper to safely set textual/number fields --- def safe_set(attr, value, default=None, cast=None): if value is None: value = default if value is not None: - if cast is not None: + if cast: setattr(f, attr, cast(value)) else: setattr(f, attr, str(value)) - - # --- Basic textual metadata --- safe_set("title", meta.get("title"), default="Unknown Title") safe_set("artist", meta.get("artist"), default="Unknown Artist") safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist") @@ -66,8 +114,6 @@ def tag_with_mediafile(file_path: str, meta: dict): safe_set("disc", meta.get("disc_number"), default=0, cast=int) safe_set("isrc", meta.get("isrc"), default="") safe_set("bpm", meta.get("bpm"), default=0, cast=int) - - # --- Release date --- release_date_str = meta.get("release_date") release_date_obj = None if release_date_str: @@ -75,34 +121,15 @@ def tag_with_mediafile(file_path: str, meta: dict): release_date_obj = datetime.fromisoformat(release_date_str).date() except ValueError: try: - # fallback if only year string release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date() except Exception: pass if release_date_obj: f.date = release_date_obj - - # --- Save all tags --- f.save() -def cleanup_empty_dirs(root: Path): - """ - Recursively remove any directories under root that contain no files - (empty or only empty subdirectories). - """ - for dirpath, dirnames, filenames in os.walk(root, topdown=False): - p = Path(dirpath) - has_file = any(f.is_file() for f in p.rglob("*")) - if not has_file: - try: - p.rmdir() - except Exception: - pass - - def sanitize_filename(name: str) -> str: - """Make a string safe for file/dir names.""" if not name: return "Unknown" name = name.replace("/", "-").replace("\\", "-") @@ -113,18 +140,12 @@ def sanitize_filename(name: str) -> str: def ensure_unique_path(p: Path) -> Path: - """ - Ensure the given file or directory path is unique *within its parent folder*. - Only appends (2), (3)... if a real conflict exists in that folder. - """ parent = p.parent stem, suffix = p.stem, p.suffix existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()} - candidate = f"{stem}{suffix}" if candidate not in existing: return parent / candidate - counter = 2 while True: candidate = f"{stem} ({counter}){suffix}" @@ -133,7 +154,7 @@ def ensure_unique_path(p: Path) -> Path: counter += 1 -# ---------- Job ---------- +# ---------- bulk_download ---------- def bulk_download(track_list: list, quality: str = "FLAC"): """ RQ job: @@ -142,9 +163,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"): - uses SR metadata to name/organize files - creates ONE tarball for all tracks - returns [tarball_path] + - sends relevant messages to Discord """ job = get_current_job() job_id = job.id if job else uuid.uuid4().hex + target = job.meta.get("target") if job else None staging_root = ROOT_DIR / job_id if job: @@ -156,30 +179,27 @@ def bulk_download(track_list: list, quality: str = "FLAC"): job.meta["status"] = "Started" job.save_meta() except Exception as e: - logging.warning("Failed to init job.meta: %s", e) + send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target) + + # Job started Discord message + asyncio.run(discord_notify( + DISCORD_WEBHOOK, + title=f"Job Started: {job_id}", + description=f"Processing `{len(track_list)}` track(s)", + target=target, + color=0x00FFFF + )) async def process_tracks(): per_track_meta = [] all_final_files = [] all_artists = set() - (ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True) async with aiohttp.ClientSession(headers=HEADERS) as session: total = len(track_list or []) - logging.critical("Total tracks to process: %s", total) - if job: - job.meta["progress"] = 0 - job.save_meta() - for i, track_id in enumerate(track_list or []): - track_info = { - "track_id": str(track_id), - "status": "Pending", - "file_path": None, - "error": None, - "attempts": 0, - } + track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0} attempt = 0 while attempt < MAX_RETRIES: @@ -195,7 +215,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"): parsed = urlparse(url) clean_path = unquote(parsed.path) ext = Path(clean_path).suffix or ".mp3" - tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}") async with session.get(url) as resp: @@ -205,7 +224,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"): f.write(chunk) md = await sr.get_metadata_by_track_id(track_id) or {} - logging.info("Metadata for %s: %s", track_id, md) artist_raw = md.get("artist") or "Unknown Artist" album_raw = md.get("album") or "Unknown Album" title_raw = md.get("title") or f"Track {track_id}" @@ -215,14 +233,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"): title = sanitize_filename(title_raw) all_artists.add(artist) - - artist_dir = staging_root / artist - album_dir = artist_dir / album + album_dir = staging_root / artist / album album_dir.mkdir(parents=True, exist_ok=True) - final_file = ensure_unique_path(album_dir / f"{title}{ext}") - tag_with_mediafile(str(tmp_file), md) + tag_with_mediafile(str(tmp_file), md) tmp_file.rename(final_file) tmp_file = None @@ -233,60 +248,48 @@ def bulk_download(track_list: list, quality: str = "FLAC"): if job: job.meta["progress"] = int(((i + 1) / total) * 100) + job.meta["tracks"] = per_track_meta + [track_info] job.save_meta() break except aiohttp.ClientResponseError as e: + msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}" + send_log_to_discord(msg, "WARNING", target) if e.status == 429: - wait_time = min(60, 2**attempt) # exponential up to 60s - logging.warning( - "Rate limited (429). Sleeping %s seconds", wait_time - ) + wait_time = min(60, 2**attempt) await asyncio.sleep(wait_time) else: - await asyncio.sleep( - random.uniform(THROTTLE_MIN, THROTTLE_MAX) - ) + await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) except Exception as e: - logging.error( - "Track %s attempt %s failed: %s", track_id, attempt, e - ) - traceback.print_exc() + tb = traceback.format_exc() + msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}" + send_log_to_discord(msg, "ERROR", target) track_info["error"] = str(e) if attempt >= MAX_RETRIES: track_info["status"] = "Failed" + send_log_to_discord(f"Track {track_id} failed after {attempt} attempts", "ERROR", target) await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) + finally: try: if tmp_file and tmp_file.exists(): - tmp_file.unlink() + os.remove(tmp_file) except Exception: pass per_track_meta.append(track_info) - if job: - try: - job.meta["tracks"] = per_track_meta - job.save_meta() - except Exception as e: - logging.warning( - "Failed to update job.meta after track %s: %s", track_id, e - ) - - await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) if not all_final_files: if job: - try: - job.meta["tarball"] = None - job.meta["status"] = "Failed" - job.save_meta() - except Exception: - pass + job.meta["tarball"] = None + job.meta["status"] = "Failed" + job.save_meta() + send_log_to_discord(f"No tracks were successfully downloaded for job `{job_id}`", "CRITICAL", target) return [] - artist_counts: dict[str, int] = {} + # Tarball creation + artist_counts = {} for t in per_track_meta: if t["status"] == "Success" and t.get("file_path"): try: @@ -294,17 +297,10 @@ def bulk_download(track_list: list, quality: str = "FLAC"): except Exception: artist = "Unknown Artist" artist_counts[artist] = artist_counts.get(artist, 0) + 1 - - if artist_counts: - top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[ - 0 - ][0] - else: - top_artist = "Unknown Artist" - + top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist" combined_artist = sanitize_filename(top_artist) staged_tarball = staging_root / f"{combined_artist}.tar.gz" - # Ensure uniqueness (Windows-style padding) within the parent folder + counter = 1 base_name = staged_tarball.stem while staged_tarball.exists(): @@ -315,69 +311,52 @@ def bulk_download(track_list: list, quality: str = "FLAC"): final_tarball.parent.mkdir(parents=True, exist_ok=True) if job: - try: - job.meta["status"] = "Compressing" - job.save_meta() - except Exception: - pass + job.meta["status"] = "Compressing" + job.save_meta() logging.info("Creating tarball: %s", staged_tarball) - - def _create_tar_sync(): - try: - subprocess.run( - [ - "tar", - "-I", - "pigz -9", - "-cf", - str(staged_tarball), - "-C", - str(staging_root), - ] - + [str(f.relative_to(staging_root)) for f in all_final_files], - check=True, - ) + await discord_notify(DISCORD_WEBHOOK, + title=f"Compressing: Job {job_id}", + description=f"Creating tarball (`{len(track_list)}` track(s)).\nStaging path: {staged_tarball}", + color=0xFFA500, + target=target) + try: + subprocess.run( + ["tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root)] + + [str(f.relative_to(staging_root)) for f in all_final_files], + check=True, + ) + for f in all_final_files: + try: + os.remove(f) + except Exception: + pass + except FileNotFoundError: + send_log_to_discord("pigz not available, falling back to tarfile (slower).", "WARNING", target) + with tarfile.open(staged_tarball, "w:gz") as tar: for f in all_final_files: + try: + arcname = f.relative_to(staging_root) + except ValueError: + arcname = f.name + tar.add(f, arcname=str(arcname)) try: os.remove(f) except Exception: pass - except FileNotFoundError: - logging.warning("pigz not available, falling back to tarfile (slower).") - with tarfile.open(staged_tarball, "w:gz") as tar: - for f in all_final_files: - try: - arcname = f.relative_to(staging_root) - except ValueError: - arcname = f.name - tar.add(f, arcname=str(arcname)) - try: - os.remove(f) - except Exception: - pass - - await asyncio.to_thread(_create_tar_sync) if not staged_tarball.exists(): - logging.error("Tarball was not created: %s", staged_tarball) + send_log_to_discord(f"Tarball was not created: `{staged_tarball}`", "CRITICAL", target) if job: - try: - job.meta["status"] = "compress_failed" - job.save_meta() - except Exception: - pass + job.meta["status"] = "compress_failed" + job.save_meta() return [] - logging.critical("Tarball created: %s", staged_tarball) - try: staged_tarball.rename(final_tarball) except Exception: shutil.move(str(staged_tarball), str(final_tarball)) - logging.critical("Tarball finalized: %s", final_tarball) - await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True) if job: @@ -386,6 +365,15 @@ def bulk_download(track_list: list, quality: str = "FLAC"): job.meta["status"] = "Completed" job.save_meta() + # Job completed Discord message + await discord_notify( + DISCORD_WEBHOOK, + title=f"Job Completed: {job_id}", + description=f"Processed `{len(track_list)}` track(s). Tarball: `{final_tarball}`", + target=target, + color=0x00FF00 + ) + return [str(final_tarball)] loop = asyncio.new_event_loop() @@ -393,9 +381,9 @@ def bulk_download(track_list: list, quality: str = "FLAC"): try: return loop.run_until_complete(process_tracks()) except Exception as e: + send_log_to_discord(f"bulk_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target) if job: job.meta["status"] = "Failed" job.save_meta() - logging.critical("Exception: %s", str(e)) finally: loop.close() diff --git a/utils/sr_wrapper.py b/utils/sr_wrapper.py index 8054c14..5f42431 100644 --- a/utils/sr_wrapper.py +++ b/utils/sr_wrapper.py @@ -2,7 +2,17 @@ from typing import Optional, Any from uuid import uuid4 from urllib.parse import urlparse import hashlib +import traceback import logging +# Suppress all logging output from this module and its children +for name in [__name__, "utils.sr_wrapper"]: + logger = logging.getLogger(name) + logger.setLevel(logging.CRITICAL) + logger.propagate = False + for handler in logger.handlers: + handler.setLevel(logging.CRITICAL) +# Also set the root logger to CRITICAL as a last resort (may affect global logging) +logging.getLogger().setLevel(logging.CRITICAL) import random import asyncio import os @@ -11,6 +21,8 @@ import time from streamrip.client import TidalClient # type: ignore from streamrip.config import Config as StreamripConfig # type: ignore from dotenv import load_dotenv +from rapidfuzz import fuzz + load_dotenv() @@ -62,7 +74,18 @@ class SRUtil: await asyncio.sleep(self.METADATA_RATE_LIMIT - elapsed) result = await func(*args, **kwargs) self.last_request_time = time.time() - return result + return result + + def is_fuzzy_match(self, expected, actual, threshold=80): + if not expected or not actual: + return False + return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold + + def is_metadata_match(self, expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80): + artist_match = self.is_fuzzy_match(expected_artist, found_artist, threshold) + album_match = self.is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True + title_match = self.is_fuzzy_match(expected_title, found_title, threshold) + return artist_match and album_match and title_match def dedupe_by_key(self, key: str, entries: list[dict]) -> list[dict]: deduped = {} @@ -77,6 +100,23 @@ class SRUtil: return None m, s = divmod(seconds, 60) return f"{m}:{s:02}" + + def _get_tidal_cover_url(self, uuid, size): + """Generate a tidal cover url. + + :param uuid: VALID uuid string + :param size: + """ + TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg" + possibles = (80, 160, 320, 640, 1280) + assert size in possibles, f"size must be in {possibles}" + return TIDAL_COVER_URL.format( + uuid=uuid.replace("-", "/"), + height=size, + width=size, + ) + + def combine_album_track_metadata( self, album_json: dict | None, track_json: dict @@ -140,32 +180,33 @@ class SRUtil: ] async def get_artists_by_name(self, artist_name: str) -> Optional[list]: - """Get artist(s) by name. - Args: - artist_name (str): The name of the artist. - Returns: - Optional[dict]: The artist details or None if not found. - """ - - try: - await self.streamrip_client.login() - except Exception as e: - logging.info("Login Exception: %s", str(e)) - pass + """Get artist(s) by name. Retry login only on authentication failure. Rate limit and retry on 400/429.""" + import asyncio artists_out: list[dict] = [] - try: - artists = await self.streamrip_client.search( - media_type="artist", query=artist_name - ) - except AttributeError: - await self.streamrip_client.login() - artists = await self.streamrip_client.search( - media_type="artist", query=artist_name - ) - logging.critical("Artists output: %s", artists) + max_retries = 4 + delay = 1.0 + for attempt in range(max_retries): + try: + artists = await self.streamrip_client.search( + media_type="artist", query=artist_name + ) + break + except AttributeError: + await self.streamrip_client.login() + if attempt == max_retries - 1: + return None + except Exception as e: + msg = str(e) + if ("400" in msg or "429" in msg) and attempt < max_retries - 1: + await asyncio.sleep(delay) + delay *= 2 + continue + else: + return None + else: + return None artists = artists[0].get("items", []) if not artists: - logging.warning("No artist found for name: %s", artist_name) return None artists_out = [ { @@ -179,26 +220,33 @@ class SRUtil: return artists_out async def get_albums_by_artist_id(self, artist_id: int) -> Optional[list | dict]: - """Get albums by artist ID - Args: - artist_id (int): The ID of the artist. - Returns: - Optional[list[dict]]: List of albums or None if not found. - """ + """Get albums by artist ID. Retry login only on authentication failure. Rate limit and retry on 400/429.""" + import asyncio artist_id_str: str = str(artist_id) albums_out: list[dict] = [] - try: - await self.streamrip_client.login() - metadata = await self.streamrip_client.get_metadata( - item_id=artist_id_str, media_type="artist" - ) - except AttributeError: - await self.streamrip_client.login() - metadata = await self.streamrip_client.get_metadata( - item_id=artist_id_str, media_type="artist" - ) + max_retries = 4 + delay = 1.0 + for attempt in range(max_retries): + try: + metadata = await self.streamrip_client.get_metadata( + item_id=artist_id_str, media_type="artist" + ) + break + except AttributeError: + await self.streamrip_client.login() + if attempt == max_retries - 1: + return None + except Exception as e: + msg = str(e) + if ("400" in msg or "429" in msg) and attempt < max_retries - 1: + await asyncio.sleep(delay) + delay *= 2 + continue + else: + return None + else: + return None if not metadata: - logging.warning("No metadata found for artist ID: %s", artist_id) return None albums = self.dedupe_by_key("title", metadata.get("albums", [])) albums_out = [ @@ -211,9 +259,65 @@ class SRUtil: for album in albums if "title" in album and "id" in album and "artists" in album ] - - logging.debug("Retrieved albums: %s", albums_out) return albums_out + + async def get_album_by_name(self, artist: str, album: str) -> Optional[dict]: + """Get album by artist and album name using artist ID and fuzzy matching. Try first 8 chars, then 12 if no match. Notify on success.""" + # Notification moved to add_cover_art.py as requested + for trunc in (8, 12): + search_artist = artist[:trunc] + artists = await self.get_artists_by_name(search_artist) + if not artists: + continue + best_artist = None + best_artist_score = 0 + for a in artists: + score = fuzz.token_set_ratio(artist, a["artist"]) + if score > best_artist_score: + best_artist = a + best_artist_score = int(score) + if not best_artist or best_artist_score < 85: + continue + artist_id = best_artist["id"] + albums = await self.get_albums_by_artist_id(artist_id) + if not albums: + continue + best_album = None + best_album_score = 0 + for alb in albums: + score = fuzz.token_set_ratio(album, alb["album"]) + if score > best_album_score: + best_album = alb + best_album_score = int(score) + if best_album and best_album_score >= 85: + return best_album + return None + + async def get_cover_by_album_id(self, album_id: int, size: int = 640) -> Optional[str]: + """Get cover URL by album ID. Retry login only on authentication failure.""" + if size not in [80, 160, 320, 640, 1280]: + return None + album_id_str: str = str(album_id) + for attempt in range(2): + try: + metadata = await self.streamrip_client.get_metadata( + item_id=album_id_str, media_type="album" + ) + break + except AttributeError: + await self.streamrip_client.login() + if attempt == 1: + return None + else: + return None + if not metadata: + return None + cover_id = metadata.get("cover") + if not cover_id: + return None + cover_url = self._get_tidal_cover_url(cover_id, size) + return cover_url + async def get_tracks_by_album_id( self, album_id: int, quality: str = "FLAC" @@ -247,7 +351,7 @@ class SRUtil: ] return tracks_out - async def get_tracks_by_artist_song(self, artist: str, song: str) -> Optional[list]: + async def get_tracks_by_artist_song(self, artist: str, song: str, n: int = 0) -> Optional[list]: """Get track by artist and song name Args: artist (str): The name of the artist. @@ -256,7 +360,23 @@ class SRUtil: Optional[dict]: The track details or None if not found. TODO: Reimplement using StreamRip """ - return [] + if not self.streamrip_client.logged_in: + await self.streamrip_client.login() + try: + search_res = await self.streamrip_client.search(media_type="track", + query=f"{artist} - {song}", + ) + logging.critical("Result: %s", search_res) + return search_res[0].get('items') + except Exception as e: + traceback.print_exc() + logging.critical("Search Exception: %s", str(e)) + if n < 3: + n+=1 + return await self.get_tracks_by_artist_song(artist, song, n) + finally: + return [] + # return [] async def get_stream_url_by_track_id( self, track_id: int, quality: str = "FLAC"