api/test/add_cover_art.py

# Standard library
import os
import sys
import re
import csv
import asyncio
import logging
import traceback

# Third-party
import aiohttp
from pathlib import Path
from dotenv import load_dotenv
from rapidfuzz import fuzz
from music_tag import load_file  # type: ignore
from rich.console import Console
from rich.table import Table
from rich.progress import (
    Progress,
    BarColumn,
    TextColumn,
    TimeElapsedColumn,
    TaskProgressColumn,
)

# Local imports
sys.path.insert(0, "..")
from utils.sr_wrapper import SRUtil

import musicbrainzngs  # type: ignore
from discogs_client import Client  # type: ignore

# typing helper
from typing import Any, cast, Optional

# Optional: use the popular `itunespy` PyPI package when available
try:
    import itunespy  # type: ignore

    HAVE_ITUNESPY = True
except Exception:
    itunespy = None
    HAVE_ITUNESPY = False

# Optional: use `spotipy` when available for Spotify lookups
try:
    import spotipy  # type: ignore

    HAVE_SPOTIPY = True
except Exception:
    spotipy = None
    HAVE_SPOTIPY = False
# Reminder: If you see 'Import "music_tag" could not be resolved', run:
#   uv add music-tag

# Configurable paths and extensions
MUSIC_DIR = Path("/storage/music2/completed/FLAC/review")
AUDIO_EXTS = {".flac", ".mp3", ".m4a", ".ogg", ".wav", ".aac"}
REPORT_CSV = "cover_art_report.csv"
ALBUM_ART_CACHE: dict = {}


# Reminder: If you see 'Import "music_tag" could not be resolved', run:
#   uv add music-tag
async def search_musicbrainz_cover(
    artist, album, session: aiohttp.ClientSession, limiter: "AsyncRateLimiter"
):
    # Use musicbrainzngs to search for a release-group matching artist+album
    try:
        # search for release-groups using a thread to avoid blocking
        query = f"artist:{artist} AND release:{album}"
        try:
            res = await asyncio.to_thread(
                musicbrainzngs.search_release_groups, query, 5
            )
        except Exception:
            res = {}
            if COVER_DEBUG_QUERIES:
                try:
                    rgs_dbg = res.get("release-group-list") or []
                    dbg_info = []
                    for rg in rgs_dbg[:3]:
                        dbg_info.append(
                            {
                                "id": rg.get("id"),
                                "title": rg.get("title"),
                                "artist": artist_credit_to_name(
                                    rg.get("artist-credit", [])
                                ),
                            }
                        )
                    console.print(
                        f"[cyan][DEBUG] MusicBrainz candidates: {dbg_info}[/cyan]"
                    )
                except Exception:
                    pass
        rgs = res.get("release-group-list") or []
        if COVER_DEBUG_QUERIES:
            try:
                dbg_info = []
                for rg in (rgs or [])[:3]:
                    dbg_info.append(
                        {
                            "id": rg.get("id"),
                            "title": rg.get("title"),
                            "artist": artist_credit_to_name(
                                rg.get("artist-credit", [])
                            ),
                        }
                    )
                console.print(
                    f"[cyan][DEBUG] MusicBrainz top candidates: {dbg_info}[/cyan]"
                )
            except Exception:
                pass
        for rg in rgs:
            # try to get cover art via Cover Art Archive for releases in the group
            # check releases for a cover
            releases = rg.get("release-list") or []
            for rel in releases:
                relid = rel.get("id")
                if relid:
                    caa_url = f"https://coverartarchive.org/release/{relid}/front-500"
                    try:
                        await limiter.acquire()
                        timeout = aiohttp.ClientTimeout(total=15)
                        async with session.get(caa_url, timeout=timeout) as resp:
                            if resp.status == 200:
                                return await resp.read()
                    except Exception:
                        continue
        return None
    except Exception as e:
        console.print(f"[red]MusicBrainz search exception: {e}[/red]")
        return None


async def search_discogs_cover(
    artist, album, session: aiohttp.ClientSession, limiter: "AsyncRateLimiter"
):
    # Use discogs_client to search for releases matching artist+album
    try:
        if not DISCOGS_TOKEN:
            return None
        # Use the discogs client (synchronous) to search in a thread
        try:
            await limiter.acquire()
            if COVER_DEBUG_QUERIES:
                console.print(
                    f"[cyan][DEBUG] Discogs query: album='{album}' artist='{artist}'"
                )
            results = await asyncio.to_thread(
                discogs_client.search, album, {"artist": artist, "type": "release"}
            )
        except Exception:
            results = []
        if COVER_DEBUG_QUERIES:
            try:
                dbg = []
                for rr in (results or [])[:3]:
                    try:
                        data = getattr(rr, "data", {}) or {}
                        dbg.append(
                            {
                                "id": data.get("id"),
                                "title": data.get("title")
                                or getattr(rr, "title", None),
                                "cover_image": data.get("cover_image"),
                            }
                        )
                    except Exception:
                        continue
                console.print(f"[cyan][DEBUG] Discogs candidates: {dbg}[/cyan]")
            except Exception:
                pass
        if not results:
            # conservative normalized fallback: try a combined normalized string
            try:
                await limiter.acquire()
                combined = f"{normalize_name(artist)} {normalize_name(album)}"
                if COVER_DEBUG_QUERIES:
                    console.print(f"[cyan][DEBUG] Discogs fallback query: {combined}")
                results = await asyncio.to_thread(
                    discogs_client.search, combined, {"type": "release"}
                )
            except Exception:
                results = []
        for r in results:
            # r.data may contain 'cover_image' or images
            cover = None
            try:
                cover = r.data.get("cover_image")
            except Exception:
                cover = None
            if not cover:
                # try images list
                imgs = r.data.get("images") or []
                if imgs and isinstance(imgs, list) and imgs[0].get("uri"):
                    cover = imgs[0].get("uri")
            if cover:
                # fetch image via aiohttp
                try:
                    await limiter.acquire()
                    timeout = aiohttp.ClientTimeout(total=15)
                    async with session.get(cover, timeout=timeout) as resp:
                        if resp.status == 200:
                            return await resp.read()
                except Exception:
                    continue
        return None
    except Exception as e:
        console.print(f"[red]Discogs search exception: {e}[/red]")
        return None


# Load env once
load_dotenv()

# Console for pretty output
console = Console()

# If set to '1'|'true', run only Spotify searches (useful for quick testing)
ONLY_SPOTIFY = os.getenv("ONLY_SPOTIFY", "").lower() in ("1", "true", "yes")
# If set, print query strings and brief response info for debugging
COVER_DEBUG_QUERIES = os.getenv("COVER_DEBUG_QUERIES", "").lower() in (
    "1",
    "true",
    "yes",
)
# If set, use more aggressive fuzzy thresholds and extra fallbacks
COVER_AGGRESSIVE = os.getenv("COVER_AGGRESSIVE", "").lower() in ("1", "true", "yes")


def _log_attempt(artist, album, title, source, result):
    """Log a single, clean attempt line to console and to `search_attempts.log`.

    result should be a short string like 'Success', 'No match', 'Timeout', or an error message.
    """
    try:
        a = artist or "Unknown Artist"
        al = album or "Unknown Album"
        t = title or "Unknown Title"
        line = f"SEARCH: {a} - {al} / {t} | Source: {source} | Result: {result}"
        console.print(line)
        try:
            with open("search_attempts.log", "a", encoding="utf-8") as lf:
                lf.write(line + "\n")
        except Exception:
            pass
    except Exception:
        # Never crash logging
        pass


# Define a lightweight async rate limiter
class AsyncRateLimiter:
    def __init__(self, rate_seconds: float):
        self._rate = float(rate_seconds)
        self._lock = asyncio.Lock()
        self._last = 0.0

    async def acquire(self) -> None:
        async with self._lock:
            now = asyncio.get_event_loop().time()
            wait = self._rate - (now - self._last)
            if wait > 0:
                await asyncio.sleep(wait)
            self._last = asyncio.get_event_loop().time()


# Initialize MusicBrainz client
musicbrainzngs.set_useragent("cover-art-script", "1.0", "your-email@example.com")

# Initialize Discogs client
DISCOGS_TOKEN = os.getenv("DISCOGS_TOKEN")
discogs_client = Client("cover-art-script/1.0", user_token=DISCOGS_TOKEN)


# Define the log_api_response function at the top of the script
async def log_api_response(api_name, response):
    """Log relevant parts of API responses for debugging purposes."""
    try:
        data = await response.json()
        if api_name == "MusicBrainz":
            release_groups = data.get("release-groups", [])
            relevant_info = [
                {
                    "id": rg.get("id"),
                    "title": rg.get("title"),
                    "artist": artist_credit_to_name(rg.get("artist-credit", [])),
                }
                for rg in release_groups
            ]
            console.print(
                f"[cyan][DEBUG] {api_name} relevant response: {relevant_info}[/cyan]"
            )
        elif api_name == "Discogs":
            results = data.get("results", [])
            relevant_info = [
                {
                    "id": result.get("id"),
                    "title": result.get("title"),
                    "cover_image": result.get("cover_image"),
                }
                for result in results
            ]
            console.print(
                f"[cyan][DEBUG] {api_name} relevant response: {relevant_info}[/cyan]"
            )
        elif api_name == "iTunes":
            results = data.get("results", [])
            relevant_info = [
                {
                    "collectionId": result.get("collectionId"),
                    "collectionName": result.get("collectionName"),
                    "artworkUrl100": result.get("artworkUrl100"),
                }
                for result in results
            ]
            console.print(
                f"[cyan][DEBUG] {api_name} relevant response: {relevant_info}[/cyan]"
            )
        else:
            console.print(f"[cyan][DEBUG] {api_name} response: {data}[/cyan]")
    except Exception as e:
        console.print(f"[red][DEBUG] Failed to parse {api_name} response: {e}[/red]")


# Helper to strip common parenthetical tags from album names
def strip_album_tags(album):
    """Remove common parenthetical tags from the end of album names."""
    pattern = r"\s*\((deluxe|remaster(ed)?|original mix|expanded|bonus|edition|version|mono|stereo|explicit|clean|anniversary|special|reissue|expanded edition|bonus track(s)?|international|digital|single|ep|live|instrumental|karaoke|radio edit|explicit version|clean version|acoustic|demo|re-recorded|remix|mix|edit|feat\.?|featuring|with .+|from .+|soundtrack|ost|score|session|vol(ume)? ?\d+|disc ?\d+|cd ?\d+|lp ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])\)$"
    return re.sub(pattern, "", album, flags=re.IGNORECASE).strip()


# Helper to strip common trailing tags like EP, LP, Single, Album, etc. from album names
def strip_album_suffix(album):
    # Remove trailing tags like ' EP', ' LP', ' Single', ' Album', ' Remix', ' Version', etc.
    # Only if they appear at the end, case-insensitive, with or without punctuation
    suffix_pattern = r"[\s\-_:]*(ep|lp|single|album|remix|version|edit|mix|deluxe|expanded|anniversary|reissue|instrumental|karaoke|ost|score|session|mono|stereo|explicit|clean|bonus|disc ?\d+|cd ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])$"
    return re.sub(suffix_pattern, "", album, flags=re.IGNORECASE).strip()


# iTunes/Apple Music API fallback (async)
async def search_itunes_cover(
    session: aiohttp.ClientSession, artist, album, limiter: "AsyncRateLimiter"
):
    # Use only the `itunespy` library for iTunes album lookups.
    if not HAVE_ITUNESPY:
        console.print(
            f"[yellow]iTunes: itunespy not available; skipping iTunes album search for '{artist} - {album}'[/yellow]"
        )
        return None

    try:
        mod = cast(Any, itunespy)

        def _search():
            try:
                # try common itunespy APIs safely
                if hasattr(mod, "search_album"):
                    return mod.search_album(f"{artist} {album}")
                if hasattr(mod, "Album") and hasattr(mod.Album, "search"):
                    return mod.Album.search(f"{artist} {album}")
                if hasattr(mod, "search"):
                    return mod.search(f"{artist} {album}", entity="album")
                return None
            except Exception:
                return None

        albums = await asyncio.to_thread(_search)
        if COVER_DEBUG_QUERIES and albums:
            try:
                dbg = []
                for a in (albums or [])[:3]:
                    try:
                        aid = getattr(a, "collectionId", None) or (
                            a.get("collectionId") if isinstance(a, dict) else None
                        )
                    except Exception:
                        aid = None
                    try:
                        aname = getattr(a, "collectionName", None) or (
                            a.get("collectionName") if isinstance(a, dict) else None
                        )
                    except Exception:
                        aname = None
                    dbg.append({"id": aid, "name": aname})
                console.print(f"[cyan][DEBUG] iTunes album candidates: {dbg}[/cyan]")
            except Exception:
                pass
        if not albums:
            if COVER_DEBUG_QUERIES:
                console.print(
                    f"[cyan][DEBUG] iTunes album: no results for '{artist} - {album}', trying normalized fallback"
                )
            norm_q = f"{normalize_name(artist)} {normalize_name(album)}"

            def _search_norm():
                try:
                    if hasattr(mod, "search_album"):
                        return mod.search_album(norm_q)
                    if hasattr(mod, "Album") and hasattr(mod.Album, "search"):
                        return mod.Album.search(norm_q)
                    if hasattr(mod, "search"):
                        return mod.search(norm_q, entity="album")
                    return None
                except Exception:
                    return None

            albums = await asyncio.to_thread(_search_norm)
            if not albums:
                return None

        first = albums[0]
        art_url = getattr(first, "artwork_url", None) or getattr(
            first, "artworkUrl100", None
        )
        if not art_url:
            return None

        # Normalize to higher-res if possible
        if "100x100" in art_url:
            art_url = art_url.replace("100x100bb", "600x600bb")

        await limiter.acquire()
        img_timeout = aiohttp.ClientTimeout(total=15)
        try:
            async with session.get(art_url, timeout=img_timeout) as img_resp:
                if img_resp.status == 200:
                    return await img_resp.read()
        except Exception:
            return None
    except Exception as e:
        console.print(f"[red][ERROR] itunespy album search exception: {e}[/red]")
    return None


async def search_itunes_track(
    session: aiohttp.ClientSession, artist, title, limiter: "AsyncRateLimiter"
):
    # Use only the `itunespy` library for iTunes track lookups.
    if not HAVE_ITUNESPY:
        console.print(
            f"[yellow]iTunes: itunespy not available; skipping iTunes track search for '{artist} - {title}'[/yellow]"
        )
        return None

    try:
        mod = cast(Any, itunespy)

        def _search():
            try:
                if hasattr(mod, "search_track"):
                    return mod.search_track(f"{artist} {title}")
                if hasattr(mod, "Track") and hasattr(mod.Track, "search"):
                    return mod.Track.search(f"{artist} {title}")
                if hasattr(mod, "search"):
                    return mod.search(f"{artist} {title}", entity="song")
                return None
            except Exception:
                return None

        tracks = await asyncio.to_thread(_search)
        if not tracks:
            if COVER_DEBUG_QUERIES:
                console.print(
                    f"[cyan][DEBUG] iTunes track: no results for '{artist} - {title}', trying normalized fallback"
                )
            norm_q = f"{normalize_name(artist)} {normalize_name(title)}"

            def _search_norm_track():
                try:
                    if hasattr(mod, "search_track"):
                        return mod.search_track(norm_q)
                    if hasattr(mod, "Track") and hasattr(mod.Track, "search"):
                        return mod.Track.search(norm_q)
                    if hasattr(mod, "search"):
                        return mod.search(norm_q, entity="song")
                    return None
                except Exception:
                    return None

            tracks = await asyncio.to_thread(_search_norm_track)
            if not tracks:
                return None

        first = tracks[0]
        art_url = getattr(first, "artwork_url", None) or getattr(
            first, "artworkUrl100", None
        )
        if not art_url:
            return None
        if "100x100" in art_url:
            art_url = art_url.replace("100x100bb", "600x600bb")

        await limiter.acquire()
        img_timeout = aiohttp.ClientTimeout(total=15)
        try:
            async with session.get(art_url, timeout=img_timeout) as img_resp:
                if img_resp.status == 200:
                    return await img_resp.read()
        except Exception:
            return None
    except Exception as e:
        console.print(f"[red][ERROR] itunespy track search exception: {e}[/red]")
    return None


async def search_deezer_cover(
    session: aiohttp.ClientSession, artist, album, limiter: "AsyncRateLimiter"
):
    """Search Deezer for an album cover. Uses Deezer public API (no auth)."""
    try:
        # build simple query
        from urllib.parse import quote

        query = f"{artist} {album}"
        if COVER_DEBUG_QUERIES:
            console.print(f"[cyan][DEBUG] Deezer query: {query}")
        url = f"https://api.deezer.com/search/album?q={quote(query)}&limit=1"
        await limiter.acquire()
        timeout = aiohttp.ClientTimeout(total=10)
        async with session.get(url, timeout=timeout) as resp:
            if resp.status != 200:
                return None
            data = await resp.json()
            items = data.get("data") or []
            if COVER_DEBUG_QUERIES:
                try:
                    dbg = []
                    for it in (items or [])[:3]:
                        dbg.append(
                            {
                                "id": it.get("id"),
                                "title": it.get("title"),
                                "cover_xl": it.get("cover_xl"),
                                "cover_big": it.get("cover_big"),
                            }
                        )
                    console.print(f"[cyan][DEBUG] Deezer candidates: {dbg}[/cyan]")
                except Exception:
                    pass
            if not items:
                # try a conservative normalized fallback
                norm_q = f"{normalize_name(artist)} {normalize_name(album)}"
                if COVER_DEBUG_QUERIES:
                    console.print(f"[cyan][DEBUG] Deezer fallback query: {norm_q}")
                url2 = f"https://api.deezer.com/search/album?q={quote(norm_q)}&limit=1"
                async with session.get(url2, timeout=timeout) as resp2:
                    if resp2.status != 200:
                        return None
                    data2 = await resp2.json()
                    items = data2.get("data") or []
                    if not items:
                        return None
            first = items[0]
            # prefer XL or big covers
            art_url = (
                first.get("cover_xl") or first.get("cover_big") or first.get("cover")
            )
            if not art_url:
                return None
            await limiter.acquire()
            img_timeout = aiohttp.ClientTimeout(total=15)
            async with session.get(art_url, timeout=img_timeout) as img_resp:
                if img_resp.status == 200:
                    return await img_resp.read()
    except Exception:
        return None
    return None


async def search_lastfm_cover(
    session: aiohttp.ClientSession, artist, album, limiter: "AsyncRateLimiter"
):
    """Search Last.fm for album cover using album.getInfo. Requires LASTFM_API_KEY in env."""
    LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
    if not LASTFM_API_KEY:
        console.print(
            f"[yellow]LastFM: LASTFM_API_KEY not configured; skipping LastFM search for '{artist} - {album}'[/yellow]"
        )
        return None
    try:
        params = {
            "method": "album.getinfo",
            "api_key": LASTFM_API_KEY,
            "artist": artist,
            "album": album,
            "format": "json",
        }
        from urllib.parse import quote

        qs = "&".join(f"{quote(str(k))}={quote(str(v))}" for k, v in params.items())
        url = f"http://ws.audioscrobbler.com/2.0/?{qs}"
        await limiter.acquire()
        timeout = aiohttp.ClientTimeout(total=10)
        async with session.get(url, timeout=timeout) as resp:
            if resp.status != 200:
                return None
            data = await resp.json()
            album_data = data.get("album") or {}
            images = album_data.get("image") or []
            # images is a list of dicts with '#text' and 'size'
            art_url = None
            # prefer 'extralarge' or 'mega'
            for size_name in ("mega", "extralarge", "large", "medium"):
                for img in images:
                    if img.get("size") == size_name and img.get("#text"):
                        art_url = img.get("#text")
                        break
                if art_url:
                    break
            if not art_url:
                return None
            await limiter.acquire()
            img_timeout = aiohttp.ClientTimeout(total=15)
            async with session.get(art_url, timeout=img_timeout) as img_resp:
                if img_resp.status == 200:
                    return await img_resp.read()
    except Exception:
        return None
    return None


_SPOTIFY_CLIENT = None


def get_spotify_client():
    """Lazily create and cache a spotipy.Spotify client using client-credentials.

    Returns None if spotipy is not installed or credentials are not configured.
    """
    global _SPOTIFY_CLIENT
    if _SPOTIFY_CLIENT is not None:
        return _SPOTIFY_CLIENT
    if not HAVE_SPOTIPY:
        return None
    client_id = os.getenv("SPOTIFY_CLIENT_ID")
    client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
    if not client_id or not client_secret:
        return None
    try:
        import importlib

        sp_mod = importlib.import_module("spotipy")
        creds_mod = importlib.import_module("spotipy.oauth2")
        SpotifyClientCredentials = getattr(creds_mod, "SpotifyClientCredentials", None)
        SpotifyCls = getattr(sp_mod, "Spotify", None)
        if SpotifyClientCredentials is None or SpotifyCls is None:
            return None
        creds = SpotifyClientCredentials(
            client_id=client_id, client_secret=client_secret
        )
        _SPOTIFY_CLIENT = SpotifyCls(client_credentials_manager=creds)
        return _SPOTIFY_CLIENT
    except Exception:
        return None


async def search_spotify_cover(
    session: aiohttp.ClientSession,
    artist,
    album,
    limiter: "AsyncRateLimiter",
    isrc: Optional[str] = None,
):
    """Search Spotify for album cover with multiple strategies:
    - If `isrc` provided, try track search by ISRC first.
    - Try quoted album+artist queries, then looser queries.
    - Use fuzzy matching to validate results.
    - Pick the largest available image.
    """
    client = get_spotify_client()
    if client is None:
        console.print(
            f"[yellow]Spotify: client not configured or spotipy not installed; skipping search for '{artist} - {album}'[/yellow]"
        )
        return None

    def _sp_search(q, typ="album", limit=3):
        try:
            return client.search(q=q, type=typ, limit=limit)
        except Exception:
            return None

    try:
        # 1) ISRC search (track -> album)
        if isrc:
            res = await asyncio.to_thread(_sp_search, f"isrc:{isrc}", "track", 1)
            if res:
                tracks = res.get("tracks", {}).get("items", [])
                if tracks:
                    album_obj = tracks[0].get("album") or {}
                    images = album_obj.get("images") or []
                    if images:
                        # pick largest
                        best = max(images, key=lambda x: x.get("width") or 0)
                        art_url = best.get("url")
                        if art_url:
                            await limiter.acquire()
                            async with session.get(
                                art_url, timeout=aiohttp.ClientTimeout(total=15)
                            ) as img_resp:
                                if img_resp.status == 200:
                                    return await img_resp.read()

        # Prepare normalized variants for querying
        quoted_q = f'album:"{album}" artist:"{artist}"'
        exact_q = f"artist:{artist} album:{album}"
        norm_artist = normalize_name(artist)
        norm_album = normalize_name(album)
        simple_q = f"album:{norm_album} artist:{norm_artist}"
        queries = [
            quoted_q,
            exact_q,
            simple_q,
            f'album:"{album}"',
            f'artist:"{artist}"',
        ]

        for q in queries:
            res = await asyncio.to_thread(_sp_search, q, "album", 3)
            if not res:
                continue
            albums = res.get("albums", {}).get("items", [])
            if COVER_DEBUG_QUERIES:
                try:
                    dbg = []
                    for a in (albums or [])[:3]:
                        dbg.append(
                            {
                                "id": a.get("id"),
                                "name": a.get("name"),
                                "artists": [
                                    ar.get("name")
                                    for ar in (a.get("artists") or [])[:3]
                                    if ar.get("name")
                                ],
                                "images": [
                                    img.get("url")
                                    for img in (a.get("images") or [])[:3]
                                ],
                            }
                        )
                    console.print(
                        f"[cyan][DEBUG] Spotify album candidates for query '{q}': {dbg}[/cyan]"
                    )
                except Exception:
                    pass
            if not albums:
                continue
            # examine candidates and pick the best match via fuzzy matching
            for a in albums:
                found_album = a.get("name") or ""
                found_artist = " ".join(
                    [
                        ar.get("name")
                        for ar in (a.get("artists") or [])
                        if ar.get("name")
                    ]
                )
                if is_fuzzy_match(artist, found_artist, threshold=75) and (
                    not album or is_fuzzy_match(album, found_album, threshold=70)
                ):
                    images = a.get("images") or []
                    if not images:
                        continue
                    best = max(images, key=lambda x: x.get("width") or 0)
                    art_url = best.get("url")
                    if art_url:
                        await limiter.acquire()
                        try:
                            async with session.get(
                                art_url, timeout=aiohttp.ClientTimeout(total=15)
                            ) as img_resp:
                                if img_resp.status == 200:
                                    return await img_resp.read()
                        except Exception:
                            continue

        return None
    except Exception:
        return None


# Fuzzy match helper for metadata
def is_fuzzy_match(expected, actual, threshold=80):
    if not expected or not actual:
        return False
    return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold


# Fuzzy match for all fields
def is_metadata_match(
    expected_artist,
    expected_album,
    expected_title,
    found_artist,
    found_album,
    found_title,
    threshold=80,
):
    artist_match = is_fuzzy_match(expected_artist, found_artist, threshold)
    album_match = (
        is_fuzzy_match(expected_album, found_album, threshold)
        if expected_album
        else True
    )
    title_match = is_fuzzy_match(expected_title, found_title, threshold)
    return artist_match and album_match and title_match


# Utility to normalize artist/song names for searching
def normalize_name(name):
    # Lowercase, strip, remove extra spaces, and remove common punctuation
    name = name.lower().strip()
    name = re.sub(r"\([0-9]\)$", "", name)  # remove (1), (2), etc. at end
    name = re.sub(r"[\s_]+", " ", name)
    name = re.sub(r"[\(\)\[\]\{\}\'\"\!\?\.,:;`~@#$%^&*+=|\\/<>]", "", name)
    return name


def artist_credit_to_name(ac):
    """Safely convert a MusicBrainz artist-credit array into a single artist name string."""
    parts = []
    for a in ac:
        if isinstance(a, dict):
            # Common formats: {'name': 'Artist Name'} or {'artist': {'name': 'Artist Name'}}
            name = None
            if a.get("name"):
                name = a.get("name")
            elif (
                a.get("artist")
                and isinstance(a.get("artist"), dict)
                and a.get("artist", {}).get("name")
            ):
                name = a.get("artist", {}).get("name")
            if name:
                parts.append(name)
    return " ".join(parts)


# Suppress noisy loggers (aiohttp, urllib3, etc.)
for noisy_logger in [
    "aiohttp.client",
    "aiohttp.server",
    "aiohttp.access",
    "urllib3",
    "asyncio",
    "chardet",
    "requests.packages.urllib3",
]:
    logging.getLogger(noisy_logger).setLevel(logging.CRITICAL)
    logging.getLogger(noisy_logger).propagate = False

# Also suppress root logger to CRITICAL for anything not our own
logging.getLogger().setLevel(logging.CRITICAL)


async def fetch_srutil_cover(
    sr, artist, song, session: aiohttp.ClientSession, limiter: AsyncRateLimiter
):
    try:
        album = await sr.get_album_by_name(artist, song)
        if not album or not album.get("id"):
            return None
        cover_url = await sr.get_cover_by_album_id(album["id"], 640)
        if cover_url:
            await limiter.acquire()
            try:
                timeout = aiohttp.ClientTimeout(total=15)
                async with session.get(cover_url, timeout=timeout) as resp:
                    if resp.status == 200:
                        return await resp.read()
                    else:
                        console.print(
                            f"[red]SRUtil: Failed to fetch cover art from URL (status {resp.status}): {cover_url}[/red]"
                        )
            except Exception as e:
                console.print(f"[red]SRUtil: Exception fetching cover url: {e}[/red]")
    except Exception as e:
        msg = str(e)
        if "Cannot combine AUTHORIZATION header with AUTH argument" in msg:
            console.print(
                "[red]SRUtil: Skipping due to conflicting authentication method in dependency (AUTHORIZATION header + AUTH argument).[/red]"
            )
        else:
            console.print(f"[red]SRUtil: Exception: {e}[/red]")
    return None


async def get_isrc(file):
    try:

        def _read_isrc():
            f = load_file(file)
            # music_tag may store ISRC under 'isrc' or 'ISRC'
            try:
                val = f["isrc"].value
            except Exception:
                try:
                    val = f["ISRC"].value
                except Exception:
                    val = None
            if isinstance(val, list):
                return val[0] if val else None
            return val

        return await asyncio.to_thread(_read_isrc)
    except Exception as e:
        console.print(f"[red]Error reading ISRC for {file}: {e}[/red]")
        return None


async def search_musicbrainz_by_isrc(session, isrc, limiter: AsyncRateLimiter):
    if not isrc:
        return None
    headers = {"User-Agent": "cover-art-script/1.0"}
    # Use the ISRC lookup endpoint which returns recordings
    url = f"https://musicbrainz.org/ws/2/isrc/{isrc}?fmt=json"
    try:
        await limiter.acquire()
        timeout = aiohttp.ClientTimeout(total=15)
        async with session.get(url, headers=headers, timeout=timeout) as resp:
            if resp.status != 200:
                return None
            try:
                data = await resp.json()
            except Exception:
                return None
            recordings = data.get("recordings") or []
            for rec in recordings:
                # try releases tied to this recording
                releases = rec.get("releases") or []
                if releases:
                    relid = releases[0].get("id")
                    if relid:
                        caa_url = (
                            f"https://coverartarchive.org/release/{relid}/front-500"
                        )
                        async with session.get(caa_url, timeout=timeout) as caa_resp:
                            if caa_resp.status == 200:
                                console.print(
                                    f"[green]Found cover art via ISRC {isrc}[/green]"
                                )
                                return await caa_resp.read()
            return None
    except Exception as e:
        console.print(f"[red]MusicBrainz ISRC lookup exception for {isrc}: {e}[/red]")
        return None


# Concurrency limit for async processing
CONCURRENCY = 18


# Helper for formatting failure reasons in a consistent way
def format_failure_reason(e, resp_status=None):
    """Format a failure reason from an exception or response status"""
    if isinstance(e, asyncio.TimeoutError):
        return "timeout"
    elif isinstance(e, aiohttp.ClientError):
        return f"network error: {str(e)}"
    elif resp_status:
        return f"HTTP {resp_status}"
    elif e:
        return str(e)
    return "no match found"


async def process_file(
    file, sr, table, results, sem, session: aiohttp.ClientSession, limiters: dict
):
    """Process a single audio file to find and embed cover art."""
    async with sem:
        if await has_cover(file):
            table.add_row(file, "Already Present", "-")
            results.append([file, "Already Present", "-"])
            return

        artist, album, title = await get_artist_album_title(file)
        album_key = (artist, album)
        image_bytes = ALBUM_ART_CACHE.get(album_key)
        source = None
        status = "Failed"

        # Try ISRC-based lookup first
        isrc = await get_isrc(file)
        if isrc:
            img = await search_musicbrainz_by_isrc(
                session, isrc, limiters["musicbrainz"]
            )
            if img:
                image_bytes = img
                source = f"MusicBrainz (ISRC:{isrc})"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, source, "Success")
            else:
                _log_attempt(
                    artist, album, title, f"MusicBrainz (ISRC:{isrc})", "No match"
                )

        # If ONLY_SPOTIFY testing mode is enabled, attempt only Spotify and return
        if ONLY_SPOTIFY:
            img = await search_spotify_cover(
                session, artist, album, limiters["spotify"], isrc
            )
            if img:
                image_bytes = img
                source = "Spotify"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "Spotify", "Success")
                file_basename = os.path.basename(file)
                ok = await embed_cover(file, image_bytes)
                if ok:
                    console.print(
                        f"[green][FINAL RESULT] {file_basename} — {artist} / {album} | Success via {source}[/green]"
                    )
                else:
                    status = "Embed Failed"
                    console.print(
                        f"[red][FINAL RESULT] {file_basename} — {artist} / {album} | Embed Failed from {source}[/red]"
                    )
            else:
                _log_attempt(artist, album, title, "Spotify", "No match")
                console.print(
                    f"[yellow][FINAL RESULT] {os.path.basename(file)} — {artist} / {album} | No Spotify cover art found[/yellow]"
                )

            table.add_row(file, status, source if source else "-")
            results.append([file, status, source if source else "-"])
            return

        # SRUtil
        if not image_bytes:
            img = await fetch_srutil_cover(
                sr, artist, album, session, limiters["srutil"]
            )
            if img:
                image_bytes = img
                source = "SRUtil"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "SRUtil", "Success")
            else:
                _log_attempt(artist, album, title, "SRUtil", "No match")

        # MusicBrainz
        if not image_bytes:
            img = await search_musicbrainz_cover(
                artist, album, session, limiters["musicbrainz"]
            )
            if img:
                image_bytes = img
                source = "MusicBrainz"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "MusicBrainz", "Success")
            else:
                _log_attempt(artist, album, title, "MusicBrainz", "No match")

        # Discogs
        if not image_bytes:
            img = await search_discogs_cover(
                artist, album, session, limiters["discogs"]
            )
            if img:
                image_bytes = img
                source = "Discogs"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "Discogs", "Success")
            else:
                _log_attempt(artist, album, title, "Discogs", "No match")

        # Deezer
        if not image_bytes:
            img = await search_deezer_cover(session, artist, album, limiters["deezer"])
            if img:
                image_bytes = img
                source = "Deezer"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "Deezer", "Success")
            else:
                _log_attempt(artist, album, title, "Deezer", "No match")

        # Spotify
        if not image_bytes:
            img = await search_spotify_cover(
                session, artist, album, limiters["spotify"], isrc
            )
            if img:
                image_bytes = img
                source = "Spotify"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "Spotify", "Success")
            else:
                _log_attempt(artist, album, title, "Spotify", "No match")

        # iTunes album
        if not image_bytes:
            img = await search_itunes_cover(session, artist, album, limiters["itunes"])
            if img:
                image_bytes = img
                source = "iTunes(album)"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "iTunes(album)", "Success")
            else:
                _log_attempt(artist, album, title, "iTunes(album)", "No match")

        # iTunes track
        if not image_bytes:
            img = await search_itunes_track(session, artist, title, limiters["itunes"])
            if img:
                image_bytes = img
                source = "iTunes(track)"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "iTunes(track)", "Success")
            else:
                _log_attempt(artist, album, title, "iTunes(track)", "No match")

        # Last.fm
        if not image_bytes:
            img = await search_lastfm_cover(session, artist, album, limiters["lastfm"])
            if img:
                image_bytes = img
                source = "LastFM"
                status = "Success"
                ALBUM_ART_CACHE[album_key] = image_bytes
                _log_attempt(artist, album, title, "LastFM", "Success")
            else:
                _log_attempt(artist, album, title, "LastFM", "No match")

        # Embed and summary
        file_basename = os.path.basename(file)
        if image_bytes and source:
            ok = await embed_cover(file, image_bytes)
            if ok:
                console.print(
                    f"[green][FINAL RESULT] {file_basename} — {artist} / {album} | Success via {source}[/green]"
                )
            else:
                status = "Embed Failed"
                console.print(
                    f"[red][FINAL RESULT] {file_basename} — {artist} / {album} | Embed Failed from {source}[/red]"
                )
        else:
            console.print(
                f"[yellow][FINAL RESULT] {file_basename} — {artist} / {album} | No cover art found[/yellow]"
            )

        table.add_row(file, status, source if source else "-")
        results.append([file, status, source if source else "-"])


async def has_cover(file):
    # Check if the audio file already has embedded cover art
    try:
        f = load_file(file)
        # music_tag stores artwork in 'artwork' which may be a list-like field
        art = f["artwork"]
        # If there is any artwork, consider it present
        try:
            return bool(art.first)
        except Exception:
            # fallback if .first not available
            return bool(art)
    except Exception:
        return False


async def get_artist_album_title(file):
    # Extract artist, album, and title from audio file tags
    try:
        f = load_file(file)
        artist = str(f["artist"].first) if f["artist"].first else "Unknown Artist"
        album = str(f["album"].first) if f["album"].first else "Unknown Album"
        title = str(f["title"].first) if f["title"].first else "Unknown Title"
        return artist, album, title
    except Exception:
        return "Unknown Artist", "Unknown Album", "Unknown Title"


async def embed_cover(file, image_bytes):
    # Embed cover art into audio file metadata using music_tag
    try:
        f = load_file(file)
        f["artwork"] = image_bytes
        f.save()
        return True
    except Exception as e:
        console.print(f"[red][ERROR] Failed to embed cover: {e}[/red]")
        return False


async def main():
    try:
        console.print(f"[bold blue]Scanning directory: {MUSIC_DIR}[/bold blue]")
        sr = SRUtil()
        results = []
        files = []
        for root, _, filenames in os.walk(MUSIC_DIR):
            for fn in filenames:
                if os.path.splitext(fn)[1].lower() in AUDIO_EXTS:
                    file_path = os.path.join(root, fn)
                    files.append(file_path)

        table = Table(title="Cover Art Embedding Report")
        table.add_column("File", style="cyan", overflow="fold")
        table.add_column("Status", style="green")
        table.add_column("Source", style="magenta")

        # create rate limiters (seconds between requests)
        RATE_SRUTIL = 0.1
        RATE_MUSICBRAINZ = 1.0
        RATE_ITUNES = 0.5
        RATE_DISCOGS = 1.0
        RATE_DEEZER = 0.5
        RATE_LASTFM = 1.0
        RATE_SPOTIFY = 0.5

        limiters = {
            "srutil": AsyncRateLimiter(RATE_SRUTIL),
            "musicbrainz": AsyncRateLimiter(RATE_MUSICBRAINZ),
            "itunes": AsyncRateLimiter(RATE_ITUNES),
            "discogs": AsyncRateLimiter(RATE_DISCOGS),
            "deezer": AsyncRateLimiter(RATE_DEEZER),
            "lastfm": AsyncRateLimiter(RATE_LASTFM),
            "spotify": AsyncRateLimiter(RATE_SPOTIFY),
        }

        sem = asyncio.Semaphore(CONCURRENCY)

        def format_failure_reason(e, resp_status=None):
            """Format a failure reason from an exception or response status"""
            if isinstance(e, asyncio.TimeoutError):
                return "timeout"
            elif isinstance(e, aiohttp.ClientError):
                return f"network error: {str(e)}"
            elif resp_status:
                return f"HTTP {resp_status}"
            elif e:
                return str(e)
            return "no match found"

        async def worker(
            file, sr, table, results, sem, progress, task_id, session, limiters
        ):
            await process_file(file, sr, table, results, sem, session, limiters)
            progress.update(task_id, advance=1)

        async with aiohttp.ClientSession() as session:
            with Progress(
                TextColumn("[progress.description]{task.description}"),
                BarColumn(),
                TaskProgressColumn(),
                TimeElapsedColumn(),
            ) as progress:
                task_id = progress.add_task("Processing files...", total=len(files))
                # Schedule all workers
                await asyncio.gather(
                    *(
                        worker(
                            file,
                            sr,
                            table,
                            results,
                            sem,
                            progress,
                            task_id,
                            session,
                            limiters,
                        )
                        for file in files
                    )
                )

        # Print summary table and CSV after progress bar
        console.print(table)
        with open(REPORT_CSV, "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(["File", "Status", "Source"])
            writer.writerows(results)
        console.print(f"[bold green]CSV report written to {REPORT_CSV}[/bold green]")
    except Exception as e:
        console.print(f"[red][ERROR] Unhandled exception: {e}[/red]")
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    asyncio.run(main())