diff --git a/.gitignore b/.gitignore index 567bf3a..4005bca 100644 --- a/.gitignore +++ b/.gitignore @@ -29,5 +29,7 @@ up.py job_review.py check_missing.py **/auth/* +test/db_stats.py +test/report/* .gitignore .env \ No newline at end of file diff --git a/endpoints/radio.py b/endpoints/radio.py index fae1703..0a432af 100644 --- a/endpoints/radio.py +++ b/endpoints/radio.py @@ -22,7 +22,7 @@ from fastapi import ( HTTPException, Depends) from fastapi_throttle import RateLimiter -from fastapi.responses import RedirectResponse, JSONResponse +from fastapi.responses import RedirectResponse, JSONResponse, FileResponse class Radio(FastAPI): """Radio Endpoints""" @@ -273,10 +273,9 @@ class Radio(FastAPI): album_art: Optional[bytes] = self.radio_util.get_album_art( track_id=track_id ) - if not album_art: - return RedirectResponse( - url="https://codey.lol/images/radio_art_default.jpg", - status_code=302, + if not album_art: + return FileResponse( + path="/var/www/codey.lol/new/public/images/radio_art_default.jpg", ) return Response(content=album_art, media_type="image/png") except Exception as e: diff --git a/endpoints/rip.py b/endpoints/rip.py index 30d2caf..5360c07 100644 --- a/endpoints/rip.py +++ b/endpoints/rip.py @@ -80,6 +80,9 @@ class RIP(FastAPI): tracks_in = job.meta.get("tracks_in") tracks_out = len(job.meta.get("tracks", [])) + # `utils/rip_background.py` sets per-track status to 'Success' or 'Failed' + # so check for 'success' case-insensitively and count matches. + succeeded_tracks = len([t for t in job.meta.get("tracks", []) if str(t.get("status", "")).lower() == "success"]) return { "id": job.id, @@ -90,7 +93,7 @@ class RIP(FastAPI): "started_at": job.started_at, "ended_at": job.ended_at, "progress": progress, - "tracks": f"{tracks_out} / {tracks_in}" + "tracks": f"{succeeded_tracks} / {tracks_in}" if isinstance(tracks_in, int) else tracks_out, "target": job.meta.get("target"), @@ -101,7 +104,10 @@ class RIP(FastAPI): self, artist: str, request: Request, user=Depends(get_current_user) ) -> Response: """Get artists by name""" - artists = await self.trip_util.get_artists_by_name(artist) + # support optional grouping to return one primary per display name + # with `alternatives` for disambiguation (use ?group=true) + group = bool(request.query_params.get("group", False)) + artists = await self.trip_util.get_artists_by_name(artist, group=group) if not artists: return Response(status_code=404, content="Not found") return JSONResponse(content=artists) @@ -176,7 +182,7 @@ class RIP(FastAPI): job_timeout=14400, failure_ttl=86400, result_ttl=-1, - retry=Retry(max=1, interval=[30]), + # retry=Retry(max=1, interval=[30]), meta={ "progress": 0, "status": "Queued", diff --git a/test/add_cover_art.py b/test/add_cover_art.py index 93808e7..ad6d1bb 100644 --- a/test/add_cover_art.py +++ b/test/add_cover_art.py @@ -1,21 +1,276 @@ +# Standard library import os -import csv -import re -import time import sys -import random +import re +import csv import asyncio import logging import traceback -import requests -from music_tag import load_file + +# Third-party +import aiohttp +from pathlib import Path +from dotenv import load_dotenv +from rapidfuzz import fuzz +from music_tag import load_file # type: ignore from rich.console import Console from rich.table import Table from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn, TaskProgressColumn + +# Local imports sys.path.insert(0, "..") from utils.sr_wrapper import SRUtil -from rapidfuzz import fuzz +import musicbrainzngs # type: ignore +from discogs_client import Client # type: ignore +# typing helper +from typing import Any, cast, Optional +# Optional: use the popular `itunespy` PyPI package when available +try: + import itunespy # type: ignore + HAVE_ITUNESPY = True +except Exception: + itunespy = None + HAVE_ITUNESPY = False + +# Optional: use `spotipy` when available for Spotify lookups +try: + import spotipy # type: ignore + HAVE_SPOTIPY = True +except Exception: + spotipy = None + HAVE_SPOTIPY = False +# Reminder: If you see 'Import "music_tag" could not be resolved', run: +# uv add music-tag + +# Configurable paths and extensions +MUSIC_DIR = Path("/storage/music2/completed/FLAC/review") +AUDIO_EXTS = {'.flac', '.mp3', '.m4a', '.ogg', '.wav', '.aac'} +REPORT_CSV = "cover_art_report.csv" +ALBUM_ART_CACHE: dict = {} +# Reminder: If you see 'Import "music_tag" could not be resolved', run: +# uv add music-tag +async def search_musicbrainz_cover(artist, album, session: aiohttp.ClientSession, limiter: 'AsyncRateLimiter'): + # Use musicbrainzngs to search for a release-group matching artist+album + try: + # search for release-groups using a thread to avoid blocking + query = f"artist:{artist} AND release:{album}" + try: + res = await asyncio.to_thread(musicbrainzngs.search_release_groups, query, 5) + except Exception: + res = {} + if COVER_DEBUG_QUERIES: + try: + rgs_dbg = res.get('release-group-list') or [] + dbg_info = [] + for rg in rgs_dbg[:3]: + dbg_info.append({ + 'id': rg.get('id'), + 'title': rg.get('title'), + 'artist': artist_credit_to_name(rg.get('artist-credit', [])) + }) + console.print(f"[cyan][DEBUG] MusicBrainz candidates: {dbg_info}[/cyan]") + except Exception: + pass + rgs = res.get('release-group-list') or [] + if COVER_DEBUG_QUERIES: + try: + dbg_info = [] + for rg in (rgs or [])[:3]: + dbg_info.append({ + 'id': rg.get('id'), + 'title': rg.get('title'), + 'artist': artist_credit_to_name(rg.get('artist-credit', [])) + }) + console.print(f"[cyan][DEBUG] MusicBrainz top candidates: {dbg_info}[/cyan]") + except Exception: + pass + for rg in rgs: + # try to get cover art via Cover Art Archive for releases in the group + # check releases for a cover + releases = rg.get('release-list') or [] + for rel in releases: + relid = rel.get('id') + if relid: + caa_url = f"https://coverartarchive.org/release/{relid}/front-500" + try: + await limiter.acquire() + timeout = aiohttp.ClientTimeout(total=15) + async with session.get(caa_url, timeout=timeout) as resp: + if resp.status == 200: + return await resp.read() + except Exception: + continue + return None + except Exception as e: + console.print(f"[red]MusicBrainz search exception: {e}[/red]") + return None + +async def search_discogs_cover(artist, album, session: aiohttp.ClientSession, limiter: 'AsyncRateLimiter'): + # Use discogs_client to search for releases matching artist+album + try: + if not DISCOGS_TOKEN: + return None + # Use the discogs client (synchronous) to search in a thread + try: + await limiter.acquire() + if COVER_DEBUG_QUERIES: + console.print(f"[cyan][DEBUG] Discogs query: album='{album}' artist='{artist}'") + results = await asyncio.to_thread(discogs_client.search, album, {'artist': artist, 'type': 'release'}) + except Exception: + results = [] + if COVER_DEBUG_QUERIES: + try: + dbg = [] + for rr in (results or [])[:3]: + try: + data = getattr(rr, 'data', {}) or {} + dbg.append({ + 'id': data.get('id'), + 'title': data.get('title') or getattr(rr, 'title', None), + 'cover_image': data.get('cover_image') + }) + except Exception: + continue + console.print(f"[cyan][DEBUG] Discogs candidates: {dbg}[/cyan]") + except Exception: + pass + if not results: + # conservative normalized fallback: try a combined normalized string + try: + await limiter.acquire() + combined = f"{normalize_name(artist)} {normalize_name(album)}" + if COVER_DEBUG_QUERIES: + console.print(f"[cyan][DEBUG] Discogs fallback query: {combined}") + results = await asyncio.to_thread(discogs_client.search, combined, {'type': 'release'}) + except Exception: + results = [] + for r in results: + # r.data may contain 'cover_image' or images + cover = None + try: + cover = r.data.get('cover_image') + except Exception: + cover = None + if not cover: + # try images list + imgs = r.data.get('images') or [] + if imgs and isinstance(imgs, list) and imgs[0].get('uri'): + cover = imgs[0].get('uri') + if cover: + # fetch image via aiohttp + try: + await limiter.acquire() + timeout = aiohttp.ClientTimeout(total=15) + async with session.get(cover, timeout=timeout) as resp: + if resp.status == 200: + return await resp.read() + except Exception: + continue + return None + except Exception as e: + console.print(f"[red]Discogs search exception: {e}[/red]") + return None + +# Load env once +load_dotenv() + +# Console for pretty output +console = Console() + +# If set to '1'|'true', run only Spotify searches (useful for quick testing) +ONLY_SPOTIFY = os.getenv('ONLY_SPOTIFY', '').lower() in ('1', 'true', 'yes') +# If set, print query strings and brief response info for debugging +COVER_DEBUG_QUERIES = os.getenv('COVER_DEBUG_QUERIES', '').lower() in ('1', 'true', 'yes') +# If set, use more aggressive fuzzy thresholds and extra fallbacks +COVER_AGGRESSIVE = os.getenv('COVER_AGGRESSIVE', '').lower() in ('1', 'true', 'yes') + + +def _log_attempt(artist, album, title, source, result): + """Log a single, clean attempt line to console and to `search_attempts.log`. + + result should be a short string like 'Success', 'No match', 'Timeout', or an error message. + """ + try: + a = artist or "Unknown Artist" + al = album or "Unknown Album" + t = title or "Unknown Title" + line = f"SEARCH: {a} - {al} / {t} | Source: {source} | Result: {result}" + console.print(line) + try: + with open("search_attempts.log", "a", encoding="utf-8") as lf: + lf.write(line + "\n") + except Exception: + pass + except Exception: + # Never crash logging + pass + + +# Define a lightweight async rate limiter +class AsyncRateLimiter: + def __init__(self, rate_seconds: float): + self._rate = float(rate_seconds) + self._lock = asyncio.Lock() + self._last = 0.0 + + async def acquire(self) -> None: + async with self._lock: + now = asyncio.get_event_loop().time() + wait = self._rate - (now - self._last) + if wait > 0: + await asyncio.sleep(wait) + self._last = asyncio.get_event_loop().time() + +# Initialize MusicBrainz client +musicbrainzngs.set_useragent("cover-art-script", "1.0", "your-email@example.com") + +# Initialize Discogs client +DISCOGS_TOKEN = os.getenv("DISCOGS_TOKEN") +discogs_client = Client("cover-art-script/1.0", user_token=DISCOGS_TOKEN) + +# Define the log_api_response function at the top of the script +async def log_api_response(api_name, response): + """Log relevant parts of API responses for debugging purposes.""" + try: + data = await response.json() + if api_name == "MusicBrainz": + release_groups = data.get("release-groups", []) + relevant_info = [ + { + "id": rg.get("id"), + "title": rg.get("title"), + "artist": artist_credit_to_name(rg.get("artist-credit", [])) + } + for rg in release_groups + ] + console.print(f"[cyan][DEBUG] {api_name} relevant response: {relevant_info}[/cyan]") + elif api_name == "Discogs": + results = data.get("results", []) + relevant_info = [ + { + "id": result.get("id"), + "title": result.get("title"), + "cover_image": result.get("cover_image") + } + for result in results + ] + console.print(f"[cyan][DEBUG] {api_name} relevant response: {relevant_info}[/cyan]") + elif api_name == "iTunes": + results = data.get("results", []) + relevant_info = [ + { + "collectionId": result.get("collectionId"), + "collectionName": result.get("collectionName"), + "artworkUrl100": result.get("artworkUrl100") + } + for result in results + ] + console.print(f"[cyan][DEBUG] {api_name} relevant response: {relevant_info}[/cyan]") + else: + console.print(f"[cyan][DEBUG] {api_name} response: {data}[/cyan]") + except Exception as e: + console.print(f"[red][DEBUG] Failed to parse {api_name} response: {e}[/red]") # Helper to strip common parenthetical tags from album names def strip_album_tags(album): @@ -29,39 +284,375 @@ def strip_album_suffix(album): # Only if they appear at the end, case-insensitive, with or without punctuation suffix_pattern = r"[\s\-_:]*(ep|lp|single|album|remix|version|edit|mix|deluxe|expanded|anniversary|reissue|instrumental|karaoke|ost|score|session|mono|stereo|explicit|clean|bonus|disc ?\d+|cd ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])$" return re.sub(suffix_pattern, "", album, flags=re.IGNORECASE).strip() -# iTunes/Apple Music API fallback -def search_itunes_cover(artist, album): - """Search iTunes/Apple Music public API for album art.""" - import urllib.parse - base_url = "https://itunes.apple.com/search" - params = { - "term": f"{artist} {album}", - "entity": "album", - "limit": 1, - "media": "music" - } - url = f"{base_url}?{urllib.parse.urlencode(params)}" +# iTunes/Apple Music API fallback (async) +async def search_itunes_cover(session: aiohttp.ClientSession, artist, album, limiter: 'AsyncRateLimiter'): + # Use only the `itunespy` library for iTunes album lookups. + if not HAVE_ITUNESPY: + console.print(f"[yellow]iTunes: itunespy not available; skipping iTunes album search for '{artist} - {album}'[/yellow]") + return None + try: - resp = requests.get(url, timeout=10) - if resp.status_code != 200: + mod = cast(Any, itunespy) + + def _search(): + try: + # try common itunespy APIs safely + if hasattr(mod, 'search_album'): + return mod.search_album(f"{artist} {album}") + if hasattr(mod, 'Album') and hasattr(mod.Album, 'search'): + return mod.Album.search(f"{artist} {album}") + if hasattr(mod, 'search'): + return mod.search(f"{artist} {album}", entity='album') + return None + except Exception: + return None + + albums = await asyncio.to_thread(_search) + if COVER_DEBUG_QUERIES and albums: + try: + dbg = [] + for a in (albums or [])[:3]: + try: + aid = getattr(a, 'collectionId', None) or (a.get('collectionId') if isinstance(a, dict) else None) + except Exception: + aid = None + try: + aname = getattr(a, 'collectionName', None) or (a.get('collectionName') if isinstance(a, dict) else None) + except Exception: + aname = None + dbg.append({'id': aid, 'name': aname}) + console.print(f"[cyan][DEBUG] iTunes album candidates: {dbg}[/cyan]") + except Exception: + pass + if not albums: + if COVER_DEBUG_QUERIES: + console.print(f"[cyan][DEBUG] iTunes album: no results for '{artist} - {album}', trying normalized fallback") + norm_q = f"{normalize_name(artist)} {normalize_name(album)}" + def _search_norm(): + try: + if hasattr(mod, 'search_album'): + return mod.search_album(norm_q) + if hasattr(mod, 'Album') and hasattr(mod.Album, 'search'): + return mod.Album.search(norm_q) + if hasattr(mod, 'search'): + return mod.search(norm_q, entity='album') + return None + except Exception: + return None + + albums = await asyncio.to_thread(_search_norm) + if not albums: + return None + + first = albums[0] + art_url = getattr(first, 'artwork_url', None) or getattr(first, 'artworkUrl100', None) + if not art_url: return None - data = resp.json() - if data.get("resultCount", 0) == 0: + + # Normalize to higher-res if possible + if '100x100' in art_url: + art_url = art_url.replace('100x100bb', '600x600bb') + + await limiter.acquire() + img_timeout = aiohttp.ClientTimeout(total=15) + try: + async with session.get(art_url, timeout=img_timeout) as img_resp: + if img_resp.status == 200: + return await img_resp.read() + except Exception: return None - result = data["results"][0] - # Use the highest-res artwork available - art_url = result.get("artworkUrl100") - if art_url: - art_url = art_url.replace("100x100bb", "600x600bb") - img_resp = requests.get(art_url) - if img_resp.status_code == 200: - return img_resp.content - except Exception: - traceback.format_exc() - pass + except Exception as e: + console.print(f"[red][ERROR] itunespy album search exception: {e}[/red]") return None +async def search_itunes_track(session: aiohttp.ClientSession, artist, title, limiter: 'AsyncRateLimiter'): + # Use only the `itunespy` library for iTunes track lookups. + if not HAVE_ITUNESPY: + console.print(f"[yellow]iTunes: itunespy not available; skipping iTunes track search for '{artist} - {title}'[/yellow]") + return None + + try: + mod = cast(Any, itunespy) + + def _search(): + try: + if hasattr(mod, 'search_track'): + return mod.search_track(f"{artist} {title}") + if hasattr(mod, 'Track') and hasattr(mod.Track, 'search'): + return mod.Track.search(f"{artist} {title}") + if hasattr(mod, 'search'): + return mod.search(f"{artist} {title}", entity='song') + return None + except Exception: + return None + + tracks = await asyncio.to_thread(_search) + if not tracks: + if COVER_DEBUG_QUERIES: + console.print(f"[cyan][DEBUG] iTunes track: no results for '{artist} - {title}', trying normalized fallback") + norm_q = f"{normalize_name(artist)} {normalize_name(title)}" + def _search_norm_track(): + try: + if hasattr(mod, 'search_track'): + return mod.search_track(norm_q) + if hasattr(mod, 'Track') and hasattr(mod.Track, 'search'): + return mod.Track.search(norm_q) + if hasattr(mod, 'search'): + return mod.search(norm_q, entity='song') + return None + except Exception: + return None + + tracks = await asyncio.to_thread(_search_norm_track) + if not tracks: + return None + + first = tracks[0] + art_url = getattr(first, 'artwork_url', None) or getattr(first, 'artworkUrl100', None) + if not art_url: + return None + if '100x100' in art_url: + art_url = art_url.replace('100x100bb', '600x600bb') + + await limiter.acquire() + img_timeout = aiohttp.ClientTimeout(total=15) + try: + async with session.get(art_url, timeout=img_timeout) as img_resp: + if img_resp.status == 200: + return await img_resp.read() + except Exception: + return None + except Exception as e: + console.print(f"[red][ERROR] itunespy track search exception: {e}[/red]") + return None + + +async def search_deezer_cover(session: aiohttp.ClientSession, artist, album, limiter: 'AsyncRateLimiter'): + """Search Deezer for an album cover. Uses Deezer public API (no auth).""" + try: + # build simple query + from urllib.parse import quote + query = f"{artist} {album}" + if COVER_DEBUG_QUERIES: + console.print(f"[cyan][DEBUG] Deezer query: {query}") + url = f"https://api.deezer.com/search/album?q={quote(query)}&limit=1" + await limiter.acquire() + timeout = aiohttp.ClientTimeout(total=10) + async with session.get(url, timeout=timeout) as resp: + if resp.status != 200: + return None + data = await resp.json() + items = data.get('data') or [] + if COVER_DEBUG_QUERIES: + try: + dbg = [] + for it in (items or [])[:3]: + dbg.append({ + 'id': it.get('id'), + 'title': it.get('title'), + 'cover_xl': it.get('cover_xl'), + 'cover_big': it.get('cover_big') + }) + console.print(f"[cyan][DEBUG] Deezer candidates: {dbg}[/cyan]") + except Exception: + pass + if not items: + # try a conservative normalized fallback + norm_q = f"{normalize_name(artist)} {normalize_name(album)}" + if COVER_DEBUG_QUERIES: + console.print(f"[cyan][DEBUG] Deezer fallback query: {norm_q}") + url2 = f"https://api.deezer.com/search/album?q={quote(norm_q)}&limit=1" + async with session.get(url2, timeout=timeout) as resp2: + if resp2.status != 200: + return None + data2 = await resp2.json() + items = data2.get('data') or [] + if not items: + return None + first = items[0] + # prefer XL or big covers + art_url = first.get('cover_xl') or first.get('cover_big') or first.get('cover') + if not art_url: + return None + await limiter.acquire() + img_timeout = aiohttp.ClientTimeout(total=15) + async with session.get(art_url, timeout=img_timeout) as img_resp: + if img_resp.status == 200: + return await img_resp.read() + except Exception: + return None + return None + + +async def search_lastfm_cover(session: aiohttp.ClientSession, artist, album, limiter: 'AsyncRateLimiter'): + """Search Last.fm for album cover using album.getInfo. Requires LASTFM_API_KEY in env.""" + LASTFM_API_KEY = os.getenv('LASTFM_API_KEY') + if not LASTFM_API_KEY: + console.print(f"[yellow]LastFM: LASTFM_API_KEY not configured; skipping LastFM search for '{artist} - {album}'[/yellow]") + return None + try: + params = { + 'method': 'album.getinfo', + 'api_key': LASTFM_API_KEY, + 'artist': artist, + 'album': album, + 'format': 'json', + } + from urllib.parse import quote + qs = '&'.join(f"{quote(str(k))}={quote(str(v))}" for k, v in params.items()) + url = f"http://ws.audioscrobbler.com/2.0/?{qs}" + await limiter.acquire() + timeout = aiohttp.ClientTimeout(total=10) + async with session.get(url, timeout=timeout) as resp: + if resp.status != 200: + return None + data = await resp.json() + album_data = data.get('album') or {} + images = album_data.get('image') or [] + # images is a list of dicts with '#text' and 'size' + art_url = None + # prefer 'extralarge' or 'mega' + for size_name in ('mega', 'extralarge', 'large', 'medium'): + for img in images: + if img.get('size') == size_name and img.get('#text'): + art_url = img.get('#text') + break + if art_url: + break + if not art_url: + return None + await limiter.acquire() + img_timeout = aiohttp.ClientTimeout(total=15) + async with session.get(art_url, timeout=img_timeout) as img_resp: + if img_resp.status == 200: + return await img_resp.read() + except Exception: + return None + return None + + +_SPOTIFY_CLIENT = None + + +def get_spotify_client(): + """Lazily create and cache a spotipy.Spotify client using client-credentials. + + Returns None if spotipy is not installed or credentials are not configured. + """ + global _SPOTIFY_CLIENT + if _SPOTIFY_CLIENT is not None: + return _SPOTIFY_CLIENT + if not HAVE_SPOTIPY: + return None + client_id = os.getenv('SPOTIFY_CLIENT_ID') + client_secret = os.getenv('SPOTIFY_CLIENT_SECRET') + if not client_id or not client_secret: + return None + try: + import importlib + sp_mod = importlib.import_module('spotipy') + creds_mod = importlib.import_module('spotipy.oauth2') + SpotifyClientCredentials = getattr(creds_mod, 'SpotifyClientCredentials', None) + SpotifyCls = getattr(sp_mod, 'Spotify', None) + if SpotifyClientCredentials is None or SpotifyCls is None: + return None + creds = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret) + _SPOTIFY_CLIENT = SpotifyCls(client_credentials_manager=creds) + return _SPOTIFY_CLIENT + except Exception: + return None + + +async def search_spotify_cover(session: aiohttp.ClientSession, artist, album, limiter: 'AsyncRateLimiter', isrc: Optional[str] = None): + """Search Spotify for album cover with multiple strategies: + - If `isrc` provided, try track search by ISRC first. + - Try quoted album+artist queries, then looser queries. + - Use fuzzy matching to validate results. + - Pick the largest available image. + """ + client = get_spotify_client() + if client is None: + console.print(f"[yellow]Spotify: client not configured or spotipy not installed; skipping search for '{artist} - {album}'[/yellow]") + return None + + def _sp_search(q, typ='album', limit=3): + try: + return client.search(q=q, type=typ, limit=limit) + except Exception: + return None + + try: + # 1) ISRC search (track -> album) + if isrc: + res = await asyncio.to_thread(_sp_search, f'isrc:{isrc}', 'track', 1) + if res: + tracks = res.get('tracks', {}).get('items', []) + if tracks: + album_obj = tracks[0].get('album') or {} + images = album_obj.get('images') or [] + if images: + # pick largest + best = max(images, key=lambda x: x.get('width') or 0) + art_url = best.get('url') + if art_url: + await limiter.acquire() + async with session.get(art_url, timeout=aiohttp.ClientTimeout(total=15)) as img_resp: + if img_resp.status == 200: + return await img_resp.read() + + # Prepare normalized variants for querying + quoted_q = f'album:"{album}" artist:"{artist}"' + exact_q = f'artist:{artist} album:{album}' + norm_artist = normalize_name(artist) + norm_album = normalize_name(album) + simple_q = f'album:{norm_album} artist:{norm_artist}' + queries = [quoted_q, exact_q, simple_q, f'album:"{album}"', f'artist:"{artist}"'] + + for q in queries: + res = await asyncio.to_thread(_sp_search, q, 'album', 3) + if not res: + continue + albums = res.get('albums', {}).get('items', []) + if COVER_DEBUG_QUERIES: + try: + dbg = [] + for a in (albums or [])[:3]: + dbg.append({ + 'id': a.get('id'), + 'name': a.get('name'), + 'artists': [ar.get('name') for ar in (a.get('artists') or [])[:3] if ar.get('name')], + 'images': [img.get('url') for img in (a.get('images') or [])[:3]] + }) + console.print(f"[cyan][DEBUG] Spotify album candidates for query '{q}': {dbg}[/cyan]") + except Exception: + pass + if not albums: + continue + # examine candidates and pick the best match via fuzzy matching + for a in albums: + found_album = a.get('name') or '' + found_artist = ' '.join([ar.get('name') for ar in (a.get('artists') or []) if ar.get('name')]) + if is_fuzzy_match(artist, found_artist, threshold=75) and (not album or is_fuzzy_match(album, found_album, threshold=70)): + images = a.get('images') or [] + if not images: + continue + best = max(images, key=lambda x: x.get('width') or 0) + art_url = best.get('url') + if art_url: + await limiter.acquire() + try: + async with session.get(art_url, timeout=aiohttp.ClientTimeout(total=15)) as img_resp: + if img_resp.status == 200: + return await img_resp.read() + except Exception: + continue + + return None + except Exception: + return None + + # Fuzzy match helper for metadata def is_fuzzy_match(expected, actual, threshold=80): if not expected or not actual: @@ -84,6 +675,23 @@ def normalize_name(name): name = re.sub(r"[\(\)\[\]\{\}\'\"\!\?\.,:;`~@#$%^&*+=|\\/<>]", "", name) return name + +def artist_credit_to_name(ac): + """Safely convert a MusicBrainz artist-credit array into a single artist name string.""" + parts = [] + for a in ac: + if isinstance(a, dict): + # Common formats: {'name': 'Artist Name'} or {'artist': {'name': 'Artist Name'}} + name = None + if a.get('name'): + name = a.get('name') + elif a.get('artist') and isinstance(a.get('artist'), dict) and a.get('artist', {}).get('name'): + name = a.get('artist', {}).get('name') + if name: + parts.append(name) + return " ".join(parts) + + # Suppress noisy loggers (aiohttp, urllib3, etc.) for noisy_logger in [ "aiohttp.client", @@ -101,75 +709,23 @@ for noisy_logger in [ logging.getLogger().setLevel(logging.CRITICAL) -# Directory to scan -MUSIC_DIR = "/storage/music2/completed/FLAC/review" -REPORT_CSV = "cover_art_report.csv" -AUDIO_EXTS = {".flac", ".mp3", ".m4a"} - -console = Console() - -# MusicBrainz API helpers - -# Limit concurrent MusicBrainz requests -MUSICBRAINZ_SEMAPHORE = asyncio.Semaphore(1) - -def search_musicbrainz_cover(artist, album, max_retries=4): - url = f"https://musicbrainz.org/ws/2/release-group/?query=artist:{artist} AND release:{album}&fmt=json" - headers = {"User-Agent": "cover-art-script/1.0"} - delay = 1.5 - for attempt in range(1, max_retries + 1): - # Limit concurrency - loop = asyncio.get_event_loop() - if MUSICBRAINZ_SEMAPHORE.locked(): - loop.run_until_complete(MUSICBRAINZ_SEMAPHORE.acquire()) - else: - MUSICBRAINZ_SEMAPHORE.acquire() - try: - resp = requests.get(url, headers=headers) - if resp.status_code == 503: - console.print(f"[yellow]MusicBrainz 503 error, retrying (attempt {attempt})...[/yellow]") - time.sleep(delay + random.uniform(0, 0.5)) - delay *= 2 - continue - if resp.status_code != 200: - console.print(f"[red]MusicBrainz API error: {resp.status_code}[/red]") - return None - try: - data = resp.json() - except Exception as e: - console.print(f"[red]MusicBrainz API returned invalid JSON for {artist} - {album}: {e}[/red]") - return None - if not data.get("release-groups"): - console.print(f"[red]No release-groups found for {artist} - {album}[/red]") - return None - rgid = data["release-groups"][0]["id"] - caa_url = f"https://coverartarchive.org/release-group/{rgid}/front-500" - caa_resp = requests.get(caa_url) - if caa_resp.status_code == 200: - console.print(f"[green]Found cover art on Cover Art Archive for {artist} - {album}[/green]") - return caa_resp.content - console.print(f"[red]No cover art found on Cover Art Archive for {artist} - {album}[/red]") - return None - finally: - try: - MUSICBRAINZ_SEMAPHORE.release() - except Exception: - pass - console.print(f"[red]MusicBrainz API failed after {max_retries} attempts for {artist} - {album}[/red]") - return None - -async def fetch_srutil_cover(sr, artist, song): +async def fetch_srutil_cover(sr, artist, song, session: aiohttp.ClientSession, limiter: AsyncRateLimiter): try: album = await sr.get_album_by_name(artist, song) if not album or not album.get('id'): return None cover_url = await sr.get_cover_by_album_id(album['id'], 640) if cover_url: - resp = requests.get(cover_url) - if resp.status_code == 200: - return resp.content - else: - console.print(f"[red]SRUtil: Failed to fetch cover art from URL (status {resp.status_code}): {cover_url}[/red]") + await limiter.acquire() + try: + timeout = aiohttp.ClientTimeout(total=15) + async with session.get(cover_url, timeout=timeout) as resp: + if resp.status == 200: + return await resp.read() + else: + console.print(f"[red]SRUtil: Failed to fetch cover art from URL (status {resp.status}): {cover_url}[/red]") + except Exception as e: + console.print(f"[red]SRUtil: Exception fetching cover url: {e}[/red]") except Exception as e: msg = str(e) if "Cannot combine AUTHORIZATION header with AUTH argument" in msg: @@ -178,164 +734,354 @@ async def fetch_srutil_cover(sr, artist, song): console.print(f"[red]SRUtil: Exception: {e}[/red]") return None -def has_cover(file): + +async def get_isrc(file): + try: + def _read_isrc(): + f = load_file(file) + # music_tag may store ISRC under 'isrc' or 'ISRC' + try: + val = f['isrc'].value + except Exception: + try: + val = f['ISRC'].value + except Exception: + val = None + if isinstance(val, list): + return val[0] if val else None + return val + return await asyncio.to_thread(_read_isrc) + except Exception as e: + console.print(f"[red]Error reading ISRC for {file}: {e}[/red]") + return None + + +async def search_musicbrainz_by_isrc(session, isrc, limiter: AsyncRateLimiter): + if not isrc: + return None + headers = {"User-Agent": "cover-art-script/1.0"} + # Use the ISRC lookup endpoint which returns recordings + url = f"https://musicbrainz.org/ws/2/isrc/{isrc}?fmt=json" + try: + await limiter.acquire() + timeout = aiohttp.ClientTimeout(total=15) + async with session.get(url, headers=headers, timeout=timeout) as resp: + if resp.status != 200: + return None + try: + data = await resp.json() + except Exception: + return None + recordings = data.get('recordings') or [] + for rec in recordings: + # try releases tied to this recording + releases = rec.get('releases') or [] + if releases: + relid = releases[0].get('id') + if relid: + caa_url = f"https://coverartarchive.org/release/{relid}/front-500" + async with session.get(caa_url, timeout=timeout) as caa_resp: + if caa_resp.status == 200: + console.print(f"[green]Found cover art via ISRC {isrc}[/green]") + return await caa_resp.read() + return None + except Exception as e: + console.print(f"[red]MusicBrainz ISRC lookup exception for {isrc}: {e}[/red]") + return None + + +# Concurrency limit for async processing +CONCURRENCY = 18 + +# Helper for formatting failure reasons in a consistent way +def format_failure_reason(e, resp_status=None): + """Format a failure reason from an exception or response status""" + if isinstance(e, asyncio.TimeoutError): + return "timeout" + elif isinstance(e, aiohttp.ClientError): + return f"network error: {str(e)}" + elif resp_status: + return f"HTTP {resp_status}" + elif e: + return str(e) + return "no match found" + +async def process_file(file, sr, table, results, sem, session: aiohttp.ClientSession, limiters: dict): + """Process a single audio file to find and embed cover art.""" + async with sem: + if await has_cover(file): + table.add_row(file, "Already Present", "-") + results.append([file, "Already Present", "-"]) + return + + artist, album, title = await get_artist_album_title(file) + album_key = (artist, album) + image_bytes = ALBUM_ART_CACHE.get(album_key) + source = None + status = "Failed" + + # Try ISRC-based lookup first + isrc = await get_isrc(file) + if isrc: + img = await search_musicbrainz_by_isrc(session, isrc, limiters['musicbrainz']) + if img: + image_bytes = img + source = f"MusicBrainz (ISRC:{isrc})" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, source, "Success") + else: + _log_attempt(artist, album, title, f"MusicBrainz (ISRC:{isrc})", "No match") + + # If ONLY_SPOTIFY testing mode is enabled, attempt only Spotify and return + if ONLY_SPOTIFY: + img = await search_spotify_cover(session, artist, album, limiters['spotify'], isrc) + if img: + image_bytes = img + source = "Spotify" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "Spotify", "Success") + file_basename = os.path.basename(file) + ok = await embed_cover(file, image_bytes) + if ok: + console.print(f"[green][FINAL RESULT] {file_basename} — {artist} / {album} | Success via {source}[/green]") + else: + status = "Embed Failed" + console.print(f"[red][FINAL RESULT] {file_basename} — {artist} / {album} | Embed Failed from {source}[/red]") + else: + _log_attempt(artist, album, title, "Spotify", "No match") + console.print(f"[yellow][FINAL RESULT] {os.path.basename(file)} — {artist} / {album} | No Spotify cover art found[/yellow]") + + table.add_row(file, status, source if source else "-") + results.append([file, status, source if source else "-"]) + return + + # SRUtil + if not image_bytes: + img = await fetch_srutil_cover(sr, artist, album, session, limiters['srutil']) + if img: + image_bytes = img + source = "SRUtil" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "SRUtil", "Success") + else: + _log_attempt(artist, album, title, "SRUtil", "No match") + + # MusicBrainz + if not image_bytes: + img = await search_musicbrainz_cover(artist, album, session, limiters['musicbrainz']) + if img: + image_bytes = img + source = "MusicBrainz" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "MusicBrainz", "Success") + else: + _log_attempt(artist, album, title, "MusicBrainz", "No match") + + # Discogs + if not image_bytes: + img = await search_discogs_cover(artist, album, session, limiters['discogs']) + if img: + image_bytes = img + source = "Discogs" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "Discogs", "Success") + else: + _log_attempt(artist, album, title, "Discogs", "No match") + + # Deezer + if not image_bytes: + img = await search_deezer_cover(session, artist, album, limiters['deezer']) + if img: + image_bytes = img + source = "Deezer" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "Deezer", "Success") + else: + _log_attempt(artist, album, title, "Deezer", "No match") + + # Spotify + if not image_bytes: + img = await search_spotify_cover(session, artist, album, limiters['spotify'], isrc) + if img: + image_bytes = img + source = "Spotify" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "Spotify", "Success") + else: + _log_attempt(artist, album, title, "Spotify", "No match") + + # iTunes album + if not image_bytes: + img = await search_itunes_cover(session, artist, album, limiters['itunes']) + if img: + image_bytes = img + source = "iTunes(album)" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "iTunes(album)", "Success") + else: + _log_attempt(artist, album, title, "iTunes(album)", "No match") + + # iTunes track + if not image_bytes: + img = await search_itunes_track(session, artist, title, limiters['itunes']) + if img: + image_bytes = img + source = "iTunes(track)" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "iTunes(track)", "Success") + else: + _log_attempt(artist, album, title, "iTunes(track)", "No match") + + # Last.fm + if not image_bytes: + img = await search_lastfm_cover(session, artist, album, limiters['lastfm']) + if img: + image_bytes = img + source = "LastFM" + status = "Success" + ALBUM_ART_CACHE[album_key] = image_bytes + _log_attempt(artist, album, title, "LastFM", "Success") + else: + _log_attempt(artist, album, title, "LastFM", "No match") + + # Embed and summary + file_basename = os.path.basename(file) + if image_bytes and source: + ok = await embed_cover(file, image_bytes) + if ok: + console.print(f"[green][FINAL RESULT] {file_basename} — {artist} / {album} | Success via {source}[/green]") + else: + status = "Embed Failed" + console.print(f"[red][FINAL RESULT] {file_basename} — {artist} / {album} | Embed Failed from {source}[/red]") + else: + console.print(f"[yellow][FINAL RESULT] {file_basename} — {artist} / {album} | No cover art found[/yellow]") + + table.add_row(file, status, source if source else "-") + results.append([file, status, source if source else "-"]) + +async def has_cover(file): + # Check if the audio file already has embedded cover art try: f = load_file(file) - has = bool(f['artwork'].first) - return has - except Exception as e: - console.print(f"[red]Error checking cover art for {file}: {e}[/red]") + # music_tag stores artwork in 'artwork' which may be a list-like field + art = f['artwork'] + # If there is any artwork, consider it present + try: + return bool(art.first) + except Exception: + # fallback if .first not available + return bool(art) + except Exception: return False -def embed_cover(file, image_bytes): +async def get_artist_album_title(file): + # Extract artist, album, and title from audio file tags + try: + f = load_file(file) + artist = str(f['artist'].first) if f['artist'].first else "Unknown Artist" + album = str(f['album'].first) if f['album'].first else "Unknown Album" + title = str(f['title'].first) if f['title'].first else "Unknown Title" + return artist, album, title + except Exception: + return "Unknown Artist", "Unknown Album", "Unknown Title" + +async def embed_cover(file, image_bytes): + # Embed cover art into audio file metadata using music_tag try: f = load_file(file) f['artwork'] = image_bytes f.save() return True except Exception as e: - console.print(f"[red]Failed to embed cover art into {file}: {e}[/red]") + console.print(f"[red][ERROR] Failed to embed cover: {e}[/red]") return False -def get_artist_album_title(file): - try: - f = load_file(file) - artist = f['artist'].value or "" - album = f['album'].value or "" - title = f['title'].value or os.path.splitext(os.path.basename(file))[0] - return artist, album, title - except Exception as e: - console.print(f"[red]Error reading tags for {file}: {e}[/red]") - return "", "", os.path.splitext(os.path.basename(file))[0] - - -# Concurrency limit for async processing -CONCURRENCY = 12 - -async def process_file(file, sr, table, results, sem): - async with sem: - if has_cover(file): - table.add_row(file, "Already Present", "-") - results.append([file, "Already Present", "-"]) - return - artist, album, title = get_artist_album_title(file) - # Use a global or passed-in cache dict for album art - if not hasattr(process_file, "album_art_cache"): - process_file.album_art_cache = {} - album_key = (artist, album) - image_bytes = process_file.album_art_cache.get(album_key) - source = "SRUtil" - if image_bytes is None: - image_bytes = await fetch_srutil_cover(sr, artist, album) - if image_bytes: - process_file.album_art_cache[album_key] = image_bytes - if not image_bytes: - image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(album)) - source = "MusicBrainz" - if image_bytes: - process_file.album_art_cache[album_key] = image_bytes - if not image_bytes: - image_bytes = search_itunes_cover(artist, album) - source = "iTunes" - if image_bytes: - process_file.album_art_cache[album_key] = image_bytes - # If all lookups failed, try with parenthetical tag stripped - if not image_bytes and re.search(r"\([^)]*\)$", album): - cleaned_album = strip_album_tags(album) - if cleaned_album and cleaned_album != album: - cleaned_key = (artist, cleaned_album) - image_bytes = process_file.album_art_cache.get(cleaned_key) - if image_bytes is None: - image_bytes = await fetch_srutil_cover(sr, artist, cleaned_album) - if image_bytes: - process_file.album_art_cache[cleaned_key] = image_bytes - if not image_bytes: - image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(cleaned_album)) - source = "MusicBrainz (stripped)" - if image_bytes: - process_file.album_art_cache[cleaned_key] = image_bytes - if not image_bytes: - image_bytes = search_itunes_cover(artist, cleaned_album) - source = "iTunes (stripped)" - if image_bytes: - process_file.album_art_cache[cleaned_key] = image_bytes - - # If still not found, try with common suffixes (EP, LP, etc.) stripped from album name - if not image_bytes: - suffix_stripped_album = strip_album_suffix(album) - if suffix_stripped_album and suffix_stripped_album != album: - suffix_key = (artist, suffix_stripped_album) - image_bytes = process_file.album_art_cache.get(suffix_key) - if image_bytes is None: - image_bytes = await fetch_srutil_cover(sr, artist, suffix_stripped_album) - if image_bytes: - process_file.album_art_cache[suffix_key] = image_bytes - if not image_bytes: - image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(suffix_stripped_album)) - source = "MusicBrainz (suffix-stripped)" - if image_bytes: - process_file.album_art_cache[suffix_key] = image_bytes - if not image_bytes: - image_bytes = search_itunes_cover(artist, suffix_stripped_album) - source = "iTunes (suffix-stripped)" - if image_bytes: - process_file.album_art_cache[suffix_key] = image_bytes - if isinstance(image_bytes, bytes): - ok = embed_cover(file, image_bytes) - status = "Embedded" if ok else "Failed to Embed" - if ok: - console.print(f"[green]Embedded cover art from {source}:[/green] {file}") - else: - console.print(f"[red]Failed to embed cover art ({source}):[/red] {file}") - elif image_bytes: - status = "Failed to Embed (not bytes)" - console.print(f"[red]Failed to embed cover art (not bytes) ({source}):[/red] {file}") - else: - status = "Not Found" - source = "-" - console.print(f"[red]No cover art found:[/red] {file}") - table.add_row(file, status, source) - results.append([file, status, source]) - async def main(): - console.print(f"[bold blue]Scanning directory: {MUSIC_DIR}[/bold blue]") - sr = SRUtil() - results = [] - files = [] - for root, _, filenames in os.walk(MUSIC_DIR): - for fn in filenames: - if os.path.splitext(fn)[1].lower() in AUDIO_EXTS: - file_path = os.path.join(root, fn) - files.append(file_path) + try: + console.print(f"[bold blue]Scanning directory: {MUSIC_DIR}[/bold blue]") + sr = SRUtil() + results = [] + files = [] + for root, _, filenames in os.walk(MUSIC_DIR): + for fn in filenames: + if os.path.splitext(fn)[1].lower() in AUDIO_EXTS: + file_path = os.path.join(root, fn) + files.append(file_path) - table = Table(title="Cover Art Embedding Report") - table.add_column("File", style="cyan", overflow="fold") - table.add_column("Status", style="green") - table.add_column("Source", style="magenta") + table = Table(title="Cover Art Embedding Report") + table.add_column("File", style="cyan", overflow="fold") + table.add_column("Status", style="green") + table.add_column("Source", style="magenta") - sem = asyncio.Semaphore(CONCURRENCY) - async def worker(file, sr, table, results, sem, progress, task_id): - await process_file(file, sr, table, results, sem) - progress.update(task_id, advance=1) + # create rate limiters (seconds between requests) + RATE_SRUTIL = 0.1 + RATE_MUSICBRAINZ = 1.0 + RATE_ITUNES = 0.5 + RATE_DISCOGS = 1.0 + RATE_DEEZER = 0.5 + RATE_LASTFM = 1.0 + RATE_SPOTIFY = 0.5 - with Progress( - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TaskProgressColumn(), - TimeElapsedColumn(), - ) as progress: - task_id = progress.add_task("Processing files...", total=len(files)) - # Schedule all workers - await asyncio.gather(*(worker(file, sr, table, results, sem, progress, task_id) for file in files)) + limiters = { + 'srutil': AsyncRateLimiter(RATE_SRUTIL), + 'musicbrainz': AsyncRateLimiter(RATE_MUSICBRAINZ), + 'itunes': AsyncRateLimiter(RATE_ITUNES), + 'discogs': AsyncRateLimiter(RATE_DISCOGS), + 'deezer': AsyncRateLimiter(RATE_DEEZER), + 'lastfm': AsyncRateLimiter(RATE_LASTFM), + 'spotify': AsyncRateLimiter(RATE_SPOTIFY), + } - # Print summary table and CSV after progress bar - console.print(table) - with open(REPORT_CSV, "w", newline="") as f: - writer = csv.writer(f) - writer.writerow(["File", "Status", "Source"]) - writer.writerows(results) - console.print(f"[bold green]CSV report written to {REPORT_CSV}[/bold green]") + sem = asyncio.Semaphore(CONCURRENCY) + + def format_failure_reason(e, resp_status=None): + """Format a failure reason from an exception or response status""" + if isinstance(e, asyncio.TimeoutError): + return "timeout" + elif isinstance(e, aiohttp.ClientError): + return f"network error: {str(e)}" + elif resp_status: + return f"HTTP {resp_status}" + elif e: + return str(e) + return "no match found" + + async def worker(file, sr, table, results, sem, progress, task_id, session, limiters): + await process_file(file, sr, table, results, sem, session, limiters) + progress.update(task_id, advance=1) + + async with aiohttp.ClientSession() as session: + with Progress( + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + TimeElapsedColumn(), + ) as progress: + task_id = progress.add_task("Processing files...", total=len(files)) + # Schedule all workers + await asyncio.gather(*(worker(file, sr, table, results, sem, progress, task_id, session, limiters) for file in files)) + + # Print summary table and CSV after progress bar + console.print(table) + with open(REPORT_CSV, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["File", "Status", "Source"]) + writer.writerows(results) + console.print(f"[bold green]CSV report written to {REPORT_CSV}[/bold green]") + except Exception as e: + console.print(f"[red][ERROR] Unhandled exception: {e}[/red]") + traceback.print_exc() + sys.exit(1) if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file diff --git a/test/test_search_track.py b/test/test_search_track.py index 516f60b..6d61380 100644 --- a/test/test_search_track.py +++ b/test/test_search_track.py @@ -10,13 +10,16 @@ logger.setLevel(logging.CRITICAL) async def main(): sr = SRUtil() - artist, album = "Kadavar - The Sacrament Of Sin".split(" - ") - search_res = await sr.get_album_by_name(artist[:8], album) - logging.critical("Search result: %s", search_res) - album = search_res - _cover = await sr.get_cover_by_album_id(album.get('id'), 640) - # cover = sr._get_tidal_cover_url(album.get('cover'), 640) - logging.critical("Result: %s, Cover: %s", album, _cover) + artist_search = await sr.get_artists_by_name("Ren") + # logging.critical("Artist search: %s", artist_search) + res = [dict(x) for x in artist_search if x.get('popularity', 0) and x.get('artist').lower() == 'ren'] + logging.critical("Results: %s", res) + # search_res = await sr.get_album_by_name(artist[:8], album) + # logging.critical("Search result: %s", search_res) + # album = search_res + # _cover = await sr.get_cover_by_album_id(album.get('id'), 640) + # # cover = sr._get_tidal_cover_url(album.get('cover'), 640) + # logging.critical("Result: %s, Cover: %s", album, _cover) return diff --git a/util.py b/util.py index 868b15a..5620df4 100644 --- a/util.py +++ b/util.py @@ -40,7 +40,7 @@ class Utilities: _key: str = key.split("Bearer ", maxsplit=1)[1].strip() - if not _key in self.constants.API_KEYS: + if _key not in self.constants.API_KEYS: return False if req_type == 2: diff --git a/utils/radio_util.py b/utils/radio_util.py index 4b672c3..c810396 100644 --- a/utils/radio_util.py +++ b/utils/radio_util.py @@ -35,6 +35,7 @@ class RadioUtil: self.gpt = gpt.GPT(self.constants) self.ls_uri: str = self.constants.LS_URI self.redis_client = redis.Redis(password=private.REDIS_PW) + self.DEDUPE_PLAYLISTS: bool = True self.sqlite_exts: list[str] = [ "/home/kyle/api/solibs/spellfix1.cpython-311-x86_64-linux-gnu.so" ] @@ -392,41 +393,69 @@ class RadioUtil: for playlist in self.playlists: playlist_redis_key: str = f"playlist:{playlist}" _playlist = await self.redis_client.json().get(playlist_redis_key) # type: ignore + # Ensure we always have a list to work with + if not _playlist: + logging.warning("No playlist found in redis for %s, skipping", playlist) + self.active_playlist[playlist] = [] + continue + + # Make sure playlist key exists if playlist not in self.active_playlist.keys(): self.active_playlist[playlist] = [] - random.shuffle(_playlist) - self.active_playlist[playlist] = [ - { - "uuid": str(uuid().hex), - "id": r["id"], - "artist": double_space.sub(" ", r["artist"]).strip(), - "song": double_space.sub(" ", r["song"]).strip(), - "album": double_space.sub(" ", r["album"]).strip(), - "genre": r["genre"] if r["genre"] else "Not Found", - "artistsong": double_space.sub( - " ", r["artistdashsong"] - ).strip(), - "file_path": r["file_path"], - "duration": r["duration"], - } for r in _playlist - if r not in self.active_playlist[playlist] - ] + + # Shuffle a copy so we don't mutate the underlying redis object + try: + shuffled = list(_playlist) + random.shuffle(shuffled) + except Exception: + shuffled = _playlist + + # Build a fresh list rather than modifying in-place (prevents duplication) + built: list[dict] = [] + for r in shuffled: + try: + item = { + "uuid": str(uuid().hex), + "id": r.get("id"), + "artist": double_space.sub(" ", (r.get("artist") or "")).strip(), + "song": double_space.sub(" ", (r.get("song") or "")).strip(), + "album": double_space.sub(" ", (r.get("album") or "")).strip(), + "genre": r.get("genre") if r.get("genre") else "Not Found", + "artistsong": double_space.sub(" ", (r.get("artistdashsong") or "")).strip(), + "file_path": r.get("file_path"), + "duration": r.get("duration"), + } + built.append(item) + except Exception: + logging.debug("Skipping malformed playlist entry for %s: %s", playlist, r) + + self.active_playlist[playlist] = built logging.info( "Populated playlist: %s with %s items", playlist, len(self.active_playlist[playlist]), ) """Dedupe""" - logging.info("Removing duplicate tracks...") - dedupe_processed = [] - for item in self.active_playlist[playlist]: - artistsongabc: str = non_alnum.sub("", item.get("artistsong", "")) - if not artistsongabc: - logging.info("Missing artistsong: %s", item) - continue - if artistsongabc in dedupe_processed: - self.active_playlist[playlist].remove(item) - dedupe_processed.append(artistsongabc) + if self.DEDUPE_PLAYLISTS: + logging.info("Removing duplicate tracks (by file_path only)...") + dedupe_processed: set[str] = set() + deduped_list: list[dict] = [] + for item in self.active_playlist[playlist]: + fp = item.get("file_path") + if not fp: + # If no file_path available, skip the item (can't dedupe reliably) + logging.info("Skipping item without file_path during dedupe: %s", item) + continue + key = fp + + if key in dedupe_processed: + continue + dedupe_processed.add(key) + deduped_list.append(item) + + self.active_playlist[playlist] = deduped_list + else: + logging.warning("Dupe removal disabled") logging.info( "Duplicates for playlist: %s removed. New playlist size: %s", diff --git a/utils/rip_background.py b/utils/rip_background.py index 229eea6..7e83176 100644 --- a/utils/rip_background.py +++ b/utils/rip_background.py @@ -12,9 +12,9 @@ from typing import Optional from urllib.parse import urlparse, unquote import aiohttp from datetime import datetime, timezone -from mediafile import MediaFile # type: ignore[import] +from mediafile import MediaFile, Image, ImageType # type: ignore[import] from rq import get_current_job -from utils.sr_wrapper import SRUtil +from utils.sr_wrapper import SRUtil, MetadataFetchError from dotenv import load_dotenv import re @@ -126,6 +126,30 @@ def tag_with_mediafile(file_path: str, meta: dict): pass if release_date_obj: f.date = release_date_obj + # Attach album art if provided in meta (synchronous fallback) + try: + cover_bytes = meta.get("cover_bytes") + cover_url = None + if not cover_bytes: + cover_url = meta.get("cover_art_url") or meta.get("cover_url") + + if not cover_bytes and cover_url: + try: + import requests + resp = requests.get(cover_url, timeout=10) + resp.raise_for_status() + cover_bytes = resp.content + except Exception: + cover_bytes = None + + if cover_bytes: + try: + img = Image(cover_bytes, desc=None, type=ImageType.front) + f.images = [img] + except Exception: + pass + except Exception: + pass f.save() @@ -154,6 +178,35 @@ def ensure_unique_path(p: Path) -> Path: counter += 1 +def ensure_unique_filename_in_dir(parent: Path, filename: str) -> Path: + """Return a Path in `parent` with a unique filename. + + Handles multi-part extensions like `.tar.gz` so names become + `Name (2).tar.gz` instead of `Name.tar (2).tar.gz`. + """ + parent.mkdir(parents=True, exist_ok=True) + # special-case .tar.gz + if filename.lower().endswith(".tar.gz"): + ext = ".tar.gz" + base = filename[:-len(ext)] + else: + p = Path(filename) + ext = p.suffix + base = p.stem + + existing = {f.name for f in parent.iterdir() if f.is_file()} + candidate = f"{base}{ext}" + if candidate not in existing: + return parent / candidate + + counter = 2 + while True: + candidate = f"{base} ({counter}){ext}" + if candidate not in existing: + return parent / candidate + counter += 1 + + # ---------- bulk_download ---------- def bulk_download(track_list: list, quality: str = "FLAC"): """ @@ -197,6 +250,19 @@ def bulk_download(track_list: list, quality: str = "FLAC"): (ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True) async with aiohttp.ClientSession(headers=HEADERS) as session: + # Set up a one-time rate-limit callback to notify on the first 429 seen by SRUtil + async def _rate_limit_notify(exc: Exception): + try: + send_log_to_discord(f"Rate limit observed while fetching metadata: {exc}", "WARNING", target) + except Exception: + pass + + # attach callback and reset notified flag for this job run + try: + sr.on_rate_limit = _rate_limit_notify + sr._rate_limit_notified = False + except Exception: + pass total = len(track_list or []) for i, track_id in enumerate(track_list or []): track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0} @@ -208,6 +274,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"): track_info["attempts"] = attempt try: + sr.get_cover_by_album_id url = await sr.get_stream_url_by_track_id(track_id, quality) if not url: raise RuntimeError("No stream URL") @@ -223,7 +290,23 @@ def bulk_download(track_list: list, quality: str = "FLAC"): async for chunk in resp.content.iter_chunked(64 * 1024): f.write(chunk) - md = await sr.get_metadata_by_track_id(track_id) or {} + try: + md = await sr.get_metadata_by_track_id(track_id) or {} + except MetadataFetchError as me: + # Permanent metadata failure — notify and continue (mark track failed) + msg = f"Metadata permanently failed for track {track_id}: {me}" + try: + send_log_to_discord(msg, "ERROR", target) + except Exception: + pass + track_info["status"] = "Failed" + track_info["error"] = str(me) + per_track_meta.append(track_info) + if job: + job.meta["tracks"] = per_track_meta + job.meta["progress"] = int(((i + 1) / total) * 100) + job.save_meta() + break artist_raw = md.get("artist") or "Unknown Artist" album_raw = md.get("album") or "Unknown Album" title_raw = md.get("title") or f"Track {track_id}" @@ -237,8 +320,104 @@ def bulk_download(track_list: list, quality: str = "FLAC"): album_dir.mkdir(parents=True, exist_ok=True) final_file = ensure_unique_path(album_dir / f"{title}{ext}") - tag_with_mediafile(str(tmp_file), md) + # Move file into final location first (tags will be updated on moved file) tmp_file.rename(final_file) + + # Try to fetch cover art via SRUtil (use album_id from metadata) + try: + album_field = md.get("album") + album_id = md.get("album_id") or (album_field.get("id") if isinstance(album_field, dict) else None) + except Exception: + album_id = None + + if album_id: + try: + cover_url = await sr.get_cover_by_album_id(album_id, size=640) + except Exception: + cover_url = None + else: + cover_url = md.get("cover_url") + + # Embed tags + artwork using music_tag if available, falling back to mediafile tagging + embedded = False + try: + if cover_url: + try: + timeout = aiohttp.ClientTimeout(total=15) + async with session.get(cover_url, timeout=timeout) as img_resp: + if img_resp.status == 200: + img_bytes = await img_resp.read() + else: + img_bytes = None + # Notify Discord about failed cover download (HTTP error) + try: + send_log_to_discord( + f"Cover download HTTP `{img_resp.status}` for track `{track_id} album_id={album_id} url={cover_url} artist={artist} album={album}`", + "WARNING", + target, + ) + except Exception: + pass + except Exception as e: + img_bytes = None + # Notify Discord about exception during cover download + try: + send_log_to_discord( + f"Cover download exception for track `{track_id} album_id={album_id} url={cover_url} artist={artist} album={album}`: `{e}`", + "WARNING", + target, + ) + except Exception: + pass + else: + img_bytes = None + + # Prefer music_tag if available (keeps compatibility with add_cover_art.py) + try: + from music_tag import load_file as mt_load_file # type: ignore + try: + mf = mt_load_file(str(final_file)) + # set basic tags + if md.get('title'): + mf['title'] = md.get('title') + if md.get('artist'): + mf['artist'] = md.get('artist') + if md.get('album'): + mf['album'] = md.get('album') + tracknum = md.get('track_number') + if tracknum is not None: + try: + mf['tracknumber'] = int(tracknum) + except Exception: + pass + if img_bytes: + mf['artwork'] = img_bytes + mf.save() + embedded = True + except Exception: + embedded = False + except Exception: + embedded = False + + # If music_tag not available or failed, fallback to mediafile tagging + if not embedded: + # If we had a cover_url but no bytes, log a warning to Discord + try: + if cover_url and not img_bytes: + send_log_to_discord( + f"Cover art not available for track {track_id} album_id={album_id} url={cover_url}", + "WARNING", + target, + ) + except Exception: + pass + tag_with_mediafile(str(final_file), md) + except Exception: + # Ensure at least the basic tags are written + try: + tag_with_mediafile(str(final_file), md) + except Exception: + pass tmp_file = None track_info["status"] = "Success" @@ -298,8 +477,16 @@ def bulk_download(track_list: list, quality: str = "FLAC"): artist = "Unknown Artist" artist_counts[artist] = artist_counts.get(artist, 0) + 1 top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist" - combined_artist = sanitize_filename(top_artist) - staged_tarball = staging_root / f"{combined_artist}.tar.gz" + # Prefer `job.meta['target']` when provided by the enqueuer. Fall back to the top artist. + target_name = None + try: + if job and job.meta: + target_name = job.meta.get("target") + except Exception: + target_name = None + + base_label = sanitize_filename(target_name) if target_name else sanitize_filename(top_artist) + staged_tarball = staging_root / f"{base_label}.tar.gz" counter = 1 base_name = staged_tarball.stem @@ -307,8 +494,10 @@ def bulk_download(track_list: list, quality: str = "FLAC"): counter += 1 staged_tarball = staging_root / f"{base_name} ({counter}).tar.gz" - final_tarball = ROOT_DIR / "completed" / quality / staged_tarball.name - final_tarball.parent.mkdir(parents=True, exist_ok=True) + final_dir = ROOT_DIR / "completed" / quality + final_dir.mkdir(parents=True, exist_ok=True) + # Ensure we don't overwrite an existing final tarball. Preserve `.tar.gz` style. + final_tarball = ensure_unique_filename_in_dir(final_dir, staged_tarball.name) if job: job.meta["status"] = "Compressing" @@ -317,7 +506,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"): logging.info("Creating tarball: %s", staged_tarball) await discord_notify(DISCORD_WEBHOOK, title=f"Compressing: Job {job_id}", - description=f"Creating tarball (`{len(track_list)}` track(s)).\nStaging path: {staged_tarball}", + description=f"Creating tarball: `{len(all_final_files)}` track(s).\nStaging path: {staged_tarball}", color=0xFFA500, target=target) try: @@ -366,10 +555,12 @@ def bulk_download(track_list: list, quality: str = "FLAC"): job.save_meta() # Job completed Discord message + completed = len(all_final_files) + failed = (len(track_list) - completed) await discord_notify( DISCORD_WEBHOOK, title=f"Job Completed: {job_id}", - description=f"Processed `{len(track_list)}` track(s). Tarball: `{final_tarball}`", + description=f"Processed `{len(track_list)}` track(s).\nCompleted: `{completed}`\nFailed: `{failed}`\nTarball: `{final_tarball}`", target=target, color=0x00FF00 ) diff --git a/utils/sr_wrapper.py b/utils/sr_wrapper.py index 5f42431..79e3d64 100644 --- a/utils/sr_wrapper.py +++ b/utils/sr_wrapper.py @@ -1,18 +1,9 @@ -from typing import Optional, Any +from typing import Optional, Any, Callable from uuid import uuid4 from urllib.parse import urlparse import hashlib import traceback import logging -# Suppress all logging output from this module and its children -for name in [__name__, "utils.sr_wrapper"]: - logger = logging.getLogger(name) - logger.setLevel(logging.CRITICAL) - logger.propagate = False - for handler in logger.handlers: - handler.setLevel(logging.CRITICAL) -# Also set the root logger to CRITICAL as a last resort (may affect global logging) -logging.getLogger().setLevel(logging.CRITICAL) import random import asyncio import os @@ -24,6 +15,21 @@ from dotenv import load_dotenv from rapidfuzz import fuzz +class MetadataFetchError(Exception): + """Raised when metadata fetch permanently fails after retries.""" + + +# Suppress all logging output from this module and its children +for name in [__name__, "utils.sr_wrapper"]: + logger = logging.getLogger(name) + logger.setLevel(logging.CRITICAL) + logger.propagate = False + for handler in logger.handlers: + handler.setLevel(logging.CRITICAL) +# Also set the root logger to CRITICAL as a last resort (may affect global logging) +logging.getLogger().setLevel(logging.CRITICAL) + + load_dotenv() @@ -65,6 +71,10 @@ class SRUtil: self.MAX_METADATA_RETRIES = 5 self.METADATA_ALBUM_CACHE: dict[str, dict] = {} self.RETRY_DELAY = 1.0 # seconds between retries + # Callback invoked when a 429 is first observed. Signature: (Exception) -> None or async + self.on_rate_limit: Optional[Callable[[Exception], Any]] = None + # Internal flag to avoid repeated notifications for the same runtime + self._rate_limit_notified = False async def rate_limited_request(self, func, *args, **kwargs): async with self.METADATA_SEMAPHORE: @@ -73,9 +83,70 @@ class SRUtil: if elapsed < self.METADATA_RATE_LIMIT: await asyncio.sleep(self.METADATA_RATE_LIMIT - elapsed) result = await func(*args, **kwargs) - self.last_request_time = time.time() + self.LAST_METADATA_REQUEST = time.time() return result + async def _safe_api_call(self, func, *args, retries: int = 2, backoff: float = 0.5, **kwargs): + """Call an async API function with resilient retry behavior. + + - On AttributeError: attempt a `login()` once and retry. + - On connection-related errors (aiohttp.ClientError, OSError, Timeout): + attempt a `login()` and retry up to `retries` times. + - On 400/429 responses (message contains '400' or '429'): retry with backoff + without triggering login (to avoid excessive logins). + + Returns the result or raises the last exception. + """ + last_exc: Optional[Exception] = None + for attempt in range(retries): + try: + return await func(*args, **kwargs) + except AttributeError as e: + # Probably missing/closed client internals: try re-login once + last_exc = e + try: + await self.streamrip_client.login() + except Exception: + pass + continue + except Exception as e: + last_exc = e + msg = str(e) + # Treat 400/429 as transient rate-limit/server responses — retry without login + if ("400" in msg or "429" in msg) and attempt < retries - 1: + # Notify on the first observed 429 (if a callback is set) + try: + if "429" in msg and not self._rate_limit_notified and self.on_rate_limit: + self._rate_limit_notified = True + try: + if asyncio.iscoroutinefunction(self.on_rate_limit): + asyncio.create_task(self.on_rate_limit(e)) + else: + loop = asyncio.get_running_loop() + loop.run_in_executor(None, self.on_rate_limit, e) + except Exception: + pass + except Exception: + pass + await asyncio.sleep(backoff * (2 ** attempt)) + continue + + # Connection related errors — try to re-login then retry + if isinstance(e, (aiohttp.ClientError, OSError, ConnectionError, asyncio.TimeoutError)) or "Connection" in msg or "closed" in msg.lower(): + try: + await self.streamrip_client.login() + except Exception: + pass + if attempt < retries - 1: + await asyncio.sleep(backoff * (2 ** attempt)) + continue + + # Unhandled / permanent error: re-raise after loop ends + # If we reach here, raise the last exception + if last_exc: + raise last_exc + return None + def is_fuzzy_match(self, expected, actual, threshold=80): if not expected or not actual: return False @@ -95,6 +166,65 @@ class SRUtil: deduped[norm] = entry return list(deduped.values()) + def group_artists_by_name(self, entries: list[dict], query: Optional[str] = None) -> list[dict]: + """ + Group artist entries by normalized display name and pick a primary candidate per name. + + Returns a list of dicts where each dict contains the primary candidate plus + an `alternatives` list for other artists that share the same display name. + + Scoring/selection policy: + - If `query` is provided, prefer an exact case-insensitive match. + - Otherwise prefer the entry with highest fuzzy match to `query`. + - Use `popularity` as a tiebreaker. + + This keeps a single line in an autocomplete dropdown while preserving the + alternate choices (IDs) so the UI can show a submenu or a secondary picker. + """ + buckets: dict[str, list[dict]] = {} + for e in entries: + name = e.get("artist", "") + norm = name.strip().lower() + buckets.setdefault(norm, []).append(e) + + out: list[dict] = [] + for norm, items in buckets.items(): + if len(items) == 1: + primary = items[0] + alternatives: list[dict] = [] + else: + # Score each item + scored = [] + for it in items: + score = 0.0 + if query: + try: + if it.get("artist", "").strip().lower() == query.strip().lower(): + score += 1000.0 + else: + score += float(fuzz.token_set_ratio(query, it.get("artist", ""))) + except Exception: + score += 0.0 + # add small weight for popularity if present + pop = it.get("popularity") or 0 + try: + score += float(pop) / 100.0 + except Exception: + pass + scored.append((score, it)) + scored.sort(key=lambda x: x[0], reverse=True) + primary = scored[0][1] + alternatives = [it for _, it in scored[1:]] + + out.append({ + "artist": primary.get("artist"), + "id": primary.get("id"), + "popularity": primary.get("popularity"), + "alternatives": alternatives, + }) + + return out + def format_duration(self, seconds): if not seconds: return None @@ -179,22 +309,23 @@ class SRUtil: for t in album_json.get("tracks", []) ] - async def get_artists_by_name(self, artist_name: str) -> Optional[list]: - """Get artist(s) by name. Retry login only on authentication failure. Rate limit and retry on 400/429.""" - import asyncio + async def get_artists_by_name(self, artist_name: str, group: bool = False) -> Optional[list]: + """Get artist(s) by name. + + Args: + artist_name: query string to search for. + group: if True return grouped results (one primary per display name with + `alternatives` list). If False return raw search items (legacy shape). + + Retry login only on authentication failure. Rate limit and retry on 400/429. + """ artists_out: list[dict] = [] max_retries = 4 delay = 1.0 for attempt in range(max_retries): try: - artists = await self.streamrip_client.search( - media_type="artist", query=artist_name - ) + artists = await self._safe_api_call(self.streamrip_client.search, media_type="artist", query=artist_name, retries=3) break - except AttributeError: - await self.streamrip_client.login() - if attempt == max_retries - 1: - return None except Exception as e: msg = str(e) if ("400" in msg or "429" in msg) and attempt < max_retries - 1: @@ -205,18 +336,30 @@ class SRUtil: return None else: return None - artists = artists[0].get("items", []) + # `artists` can be None or a list of result pages — guard accordingly if not artists: return None + # If the client returned paged results (list), pick first page dict + if isinstance(artists, list): + artists_page = artists[0] if len(artists) > 0 else {} + else: + artists_page = artists + artists_items = artists_page.get("items", []) if isinstance(artists_page, dict) else [] + if not artists_items: + return None artists_out = [ { "artist": res["name"], "id": res["id"], + "popularity": res.get("popularity", 0), } - for res in artists + for res in artists_items if "name" in res and "id" in res ] - artists_out = self.dedupe_by_key("artist", artists_out) # Remove duplicates + + if group: + return self.group_artists_by_name(artists_out, query=artist_name) + return artists_out async def get_albums_by_artist_id(self, artist_id: int) -> Optional[list | dict]: @@ -228,14 +371,8 @@ class SRUtil: delay = 1.0 for attempt in range(max_retries): try: - metadata = await self.streamrip_client.get_metadata( - item_id=artist_id_str, media_type="artist" - ) + metadata = await self._safe_api_call(self.streamrip_client.get_metadata, artist_id_str, "artist", retries=3) break - except AttributeError: - await self.streamrip_client.login() - if attempt == max_retries - 1: - return None except Exception as e: msg = str(e) if ("400" in msg or "429" in msg) and attempt < max_retries - 1: @@ -300,12 +437,9 @@ class SRUtil: album_id_str: str = str(album_id) for attempt in range(2): try: - metadata = await self.streamrip_client.get_metadata( - item_id=album_id_str, media_type="album" - ) + metadata = await self._safe_api_call(self.streamrip_client.get_metadata, item_id=album_id_str, media_type="album", retries=2) break - except AttributeError: - await self.streamrip_client.login() + except Exception: if attempt == 1: return None else: @@ -329,10 +463,11 @@ class SRUtil: Optional[list[dict]]: List of tracks or None if not found. """ album_id_str = str(album_id) - await self.streamrip_client.login() - metadata = await self.streamrip_client.get_metadata( - item_id=album_id_str, media_type="album" - ) + try: + metadata = await self._safe_api_call(self.streamrip_client.get_metadata, item_id=album_id_str, media_type="album", retries=2) + except Exception as e: + logging.warning("get_tracks_by_album_id failed: %s", e) + return None if not metadata: logging.warning("No metadata found for album ID: %s", album_id) return None @@ -360,21 +495,16 @@ class SRUtil: Optional[dict]: The track details or None if not found. TODO: Reimplement using StreamRip """ - if not self.streamrip_client.logged_in: - await self.streamrip_client.login() - try: - search_res = await self.streamrip_client.search(media_type="track", - query=f"{artist} - {song}", - ) + try: + search_res = await self._safe_api_call(self.streamrip_client.search, media_type="track", query=f"{artist} - {song}", retries=3) logging.critical("Result: %s", search_res) - return search_res[0].get('items') + return search_res[0].get('items') if search_res and isinstance(search_res, list) else [] except Exception as e: traceback.print_exc() logging.critical("Search Exception: %s", str(e)) if n < 3: - n+=1 + n += 1 return await self.get_tracks_by_artist_song(artist, song, n) - finally: return [] # return [] @@ -399,18 +529,13 @@ class SRUtil: quality_int = 1 track_id_str: str = str(track_id) - await self.streamrip_client.login() - + # Ensure client is logged in via safe call when needed inside _safe_api_call try: logging.critical("Using quality_int: %s", quality_int) - track = await self.streamrip_client.get_downloadable( - track_id=track_id_str, quality=quality_int - ) - except AttributeError: - await self.streamrip_client.login() - track = await self.streamrip_client.get_downloadable( - track_id=track_id_str, quality=quality_int - ) + track = await self._safe_api_call(self.streamrip_client.get_downloadable, track_id=track_id_str, quality=quality_int, retries=3) + except Exception as e: + logging.warning("get_stream_url_by_track_id failed: %s", e) + return None if not track: logging.warning("No track found for ID: %s", track_id) return None @@ -427,8 +552,7 @@ class SRUtil: """ for attempt in range(1, self.MAX_METADATA_RETRIES + 1): try: - await self.streamrip_client.login() - + await self._safe_api_call(self.streamrip_client.login, retries=1) # Track metadata metadata = await self.rate_limited_request( self.streamrip_client.get_metadata, str(track_id), "track" @@ -443,7 +567,7 @@ class SRUtil: album_metadata = self.METADATA_ALBUM_CACHE[album_id] else: album_metadata = await self.rate_limited_request( - self.streamrip_client.get_metadata, album_id, "album" + lambda i, t: self._safe_api_call(self.streamrip_client.get_metadata, i, t, retries=2), album_id, "album" ) if not album_metadata: return None @@ -456,6 +580,9 @@ class SRUtil: album_metadata, metadata ) + # Include album id so callers can fetch cover art if desired + combined_metadata["album_id"] = album_id + logging.info( "Combined metadata for track ID %s (attempt %d): %s", track_id, @@ -483,7 +610,10 @@ class SRUtil: track_id, self.MAX_METADATA_RETRIES, ) - return None + # Raise a specific exception so callers can react (e.g. notify) + raise MetadataFetchError(f"Metadata fetch failed permanently for track {track_id} after {self.MAX_METADATA_RETRIES} attempts: {e}") + # If we reach here without returning, raise a generic metadata error + raise MetadataFetchError(f"Metadata fetch failed for track {track_id}") async def download(self, track_id: int, quality: str = "LOSSLESS") -> bool | str: @@ -495,7 +625,7 @@ class SRUtil: bool """ try: - await self.streamrip_client.login() + await self._safe_api_call(self.streamrip_client.login, retries=1) track_url = await self.get_stream_url_by_track_id(track_id) if not track_url: return False @@ -507,6 +637,12 @@ class SRUtil: f"{self.streamrip_config.session.downloads.folder}/{unique}" ) dl_path = f"{dl_folder_path}/{track_id}.{parsed_url_ext}" + # ensure download folder exists + try: + os.makedirs(dl_folder_path, exist_ok=True) + except Exception: + pass + async with aiohttp.ClientSession() as session: async with session.get( track_url, headers={}, timeout=aiohttp.ClientTimeout(total=60)