misc

2025-09-18 08:13:21 -04:00
parent 3b74333b96
commit e1194475b3
5 changed files with 661 additions and 188 deletions
--- a/test/add_cover_art.py
+++ b/test/add_cover_art.py
@@ -0,0 +1,341 @@
+import os
+import csv
+import re
+import time
+import sys
+import random
+import asyncio
+import logging
+import traceback
+import requests
+from music_tag import load_file
+from rich.console import Console
+from rich.table import Table
+from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn, TaskProgressColumn
+sys.path.insert(0, "..")
+from utils.sr_wrapper import SRUtil
+from rapidfuzz import fuzz
+
+
+# Helper to strip common parenthetical tags from album names
+def strip_album_tags(album):
+    """Remove common parenthetical tags from the end of album names."""
+    pattern = r"\s*\((deluxe|remaster(ed)?|original mix|expanded|bonus|edition|version|mono|stereo|explicit|clean|anniversary|special|reissue|expanded edition|bonus track(s)?|international|digital|single|ep|live|instrumental|karaoke|radio edit|explicit version|clean version|acoustic|demo|re-recorded|remix|mix|edit|feat\.?|featuring|with .+|from .+|soundtrack|ost|score|session|vol(ume)? ?\d+|disc ?\d+|cd ?\d+|lp ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])\)$"
+    return re.sub(pattern, "", album, flags=re.IGNORECASE).strip()
+
+# Helper to strip common trailing tags like EP, LP, Single, Album, etc. from album names
+def strip_album_suffix(album):
+    # Remove trailing tags like ' EP', ' LP', ' Single', ' Album', ' Remix', ' Version', etc.
+    # Only if they appear at the end, case-insensitive, with or without punctuation
+    suffix_pattern = r"[\s\-_:]*(ep|lp|single|album|remix|version|edit|mix|deluxe|expanded|anniversary|reissue|instrumental|karaoke|ost|score|session|mono|stereo|explicit|clean|bonus|disc ?\d+|cd ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])$"
+    return re.sub(suffix_pattern, "", album, flags=re.IGNORECASE).strip()
+# iTunes/Apple Music API fallback
+def search_itunes_cover(artist, album):
+    """Search iTunes/Apple Music public API for album art."""
+    import urllib.parse
+    base_url = "https://itunes.apple.com/search"
+    params = {
+        "term": f"{artist} {album}",
+        "entity": "album",
+        "limit": 1,
+        "media": "music"
+    }
+    url = f"{base_url}?{urllib.parse.urlencode(params)}"
+    try:
+        resp = requests.get(url, timeout=10)
+        if resp.status_code != 200:
+            return None
+        data = resp.json()
+        if data.get("resultCount", 0) == 0:
+            return None
+        result = data["results"][0]
+        # Use the highest-res artwork available
+        art_url = result.get("artworkUrl100")
+        if art_url:
+            art_url = art_url.replace("100x100bb", "600x600bb")
+            img_resp = requests.get(art_url)
+            if img_resp.status_code == 200:
+                return img_resp.content
+    except Exception:
+        traceback.format_exc()
+        pass
+    return None
+
+
+# Fuzzy match helper for metadata
+def is_fuzzy_match(expected, actual, threshold=80):
+    if not expected or not actual:
+        return False
+    return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold
+
+# Fuzzy match for all fields
+def is_metadata_match(expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80):
+    artist_match = is_fuzzy_match(expected_artist, found_artist, threshold)
+    album_match = is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True
+    title_match = is_fuzzy_match(expected_title, found_title, threshold)
+    return artist_match and album_match and title_match
+
+# Utility to normalize artist/song names for searching
+def normalize_name(name):
+    # Lowercase, strip, remove extra spaces, and remove common punctuation
+    name = name.lower().strip()
+    name = re.sub(r"\([0-9]\)$", "", name)  # remove (1), (2), etc. at end
+    name = re.sub(r"[\s_]+", " ", name)
+    name = re.sub(r"[\(\)\[\]\{\}\'\"\!\?\.,:;`~@#$%^&*+=|\\/<>]", "", name)
+    return name
+
+# Suppress noisy loggers (aiohttp, urllib3, etc.)
+for noisy_logger in [
+    "aiohttp.client",
+    "aiohttp.server",
+    "aiohttp.access",
+    "urllib3",
+    "asyncio",
+    "chardet",
+    "requests.packages.urllib3",
+]:
+    logging.getLogger(noisy_logger).setLevel(logging.CRITICAL)
+    logging.getLogger(noisy_logger).propagate = False
+
+# Also suppress root logger to CRITICAL for anything not our own
+logging.getLogger().setLevel(logging.CRITICAL)
+
+
+# Directory to scan
+MUSIC_DIR = "/storage/music2/completed/FLAC/review"
+REPORT_CSV = "cover_art_report.csv"
+AUDIO_EXTS = {".flac", ".mp3", ".m4a"}
+
+console = Console()
+
+# MusicBrainz API helpers
+
+# Limit concurrent MusicBrainz requests
+MUSICBRAINZ_SEMAPHORE = asyncio.Semaphore(1)
+
+def search_musicbrainz_cover(artist, album, max_retries=4):
+    url = f"https://musicbrainz.org/ws/2/release-group/?query=artist:{artist} AND release:{album}&fmt=json"
+    headers = {"User-Agent": "cover-art-script/1.0"}
+    delay = 1.5
+    for attempt in range(1, max_retries + 1):
+        # Limit concurrency
+        loop = asyncio.get_event_loop()
+        if MUSICBRAINZ_SEMAPHORE.locked():
+            loop.run_until_complete(MUSICBRAINZ_SEMAPHORE.acquire())
+        else:
+            MUSICBRAINZ_SEMAPHORE.acquire()
+        try:
+            resp = requests.get(url, headers=headers)
+            if resp.status_code == 503:
+                console.print(f"[yellow]MusicBrainz 503 error, retrying (attempt {attempt})...[/yellow]")
+                time.sleep(delay + random.uniform(0, 0.5))
+                delay *= 2
+                continue
+            if resp.status_code != 200:
+                console.print(f"[red]MusicBrainz API error: {resp.status_code}[/red]")
+                return None
+            try:
+                data = resp.json()
+            except Exception as e:
+                console.print(f"[red]MusicBrainz API returned invalid JSON for {artist} - {album}: {e}[/red]")
+                return None
+            if not data.get("release-groups"):
+                console.print(f"[red]No release-groups found for {artist} - {album}[/red]")
+                return None
+            rgid = data["release-groups"][0]["id"]
+            caa_url = f"https://coverartarchive.org/release-group/{rgid}/front-500"
+            caa_resp = requests.get(caa_url)
+            if caa_resp.status_code == 200:
+                console.print(f"[green]Found cover art on Cover Art Archive for {artist} - {album}[/green]")
+                return caa_resp.content
+            console.print(f"[red]No cover art found on Cover Art Archive for {artist} - {album}[/red]")
+            return None
+        finally:
+            try:
+                MUSICBRAINZ_SEMAPHORE.release()
+            except Exception:
+                pass
+    console.print(f"[red]MusicBrainz API failed after {max_retries} attempts for {artist} - {album}[/red]")
+    return None
+
+async def fetch_srutil_cover(sr, artist, song):
+    try:
+        album = await sr.get_album_by_name(artist, song)
+        if not album or not album.get('id'):
+            return None
+        cover_url = await sr.get_cover_by_album_id(album['id'], 640)
+        if cover_url:
+            resp = requests.get(cover_url)
+            if resp.status_code == 200:
+                return resp.content
+            else:
+                console.print(f"[red]SRUtil: Failed to fetch cover art from URL (status {resp.status_code}): {cover_url}[/red]")
+    except Exception as e:
+        msg = str(e)
+        if "Cannot combine AUTHORIZATION header with AUTH argument" in msg:
+            console.print("[red]SRUtil: Skipping due to conflicting authentication method in dependency (AUTHORIZATION header + AUTH argument).[/red]")
+        else:
+            console.print(f"[red]SRUtil: Exception: {e}[/red]")
+    return None
+
+def has_cover(file):
+    try:
+        f = load_file(file)
+        has = bool(f['artwork'].first)
+        return has
+    except Exception as e:
+        console.print(f"[red]Error checking cover art for {file}: {e}[/red]")
+        return False
+
+def embed_cover(file, image_bytes):
+    try:
+        f = load_file(file)
+        f['artwork'] = image_bytes
+        f.save()
+        return True
+    except Exception as e:
+        console.print(f"[red]Failed to embed cover art into {file}: {e}[/red]")
+        return False
+
+def get_artist_album_title(file):
+    try:
+        f = load_file(file)
+        artist = f['artist'].value or ""
+        album = f['album'].value or ""
+        title = f['title'].value or os.path.splitext(os.path.basename(file))[0]
+        return artist, album, title
+    except Exception as e:
+        console.print(f"[red]Error reading tags for {file}: {e}[/red]")
+        return "", "", os.path.splitext(os.path.basename(file))[0]
+
+
+# Concurrency limit for async processing
+CONCURRENCY = 12
+
+async def process_file(file, sr, table, results, sem):
+    async with sem:
+        if has_cover(file):
+            table.add_row(file, "Already Present", "-")
+            results.append([file, "Already Present", "-"])
+            return
+        artist, album, title = get_artist_album_title(file)
+        # Use a global or passed-in cache dict for album art
+        if not hasattr(process_file, "album_art_cache"):
+            process_file.album_art_cache = {}
+        album_key = (artist, album)
+        image_bytes = process_file.album_art_cache.get(album_key)
+        source = "SRUtil"
+        if image_bytes is None:
+            image_bytes = await fetch_srutil_cover(sr, artist, album)
+            if image_bytes:
+                process_file.album_art_cache[album_key] = image_bytes
+        if not image_bytes:
+            image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(album))
+            source = "MusicBrainz"
+            if image_bytes:
+                process_file.album_art_cache[album_key] = image_bytes
+        if not image_bytes:
+            image_bytes = search_itunes_cover(artist, album)
+            source = "iTunes"
+            if image_bytes:
+                process_file.album_art_cache[album_key] = image_bytes
+        # If all lookups failed, try with parenthetical tag stripped
+        if not image_bytes and re.search(r"\([^)]*\)$", album):
+            cleaned_album = strip_album_tags(album)
+            if cleaned_album and cleaned_album != album:
+                cleaned_key = (artist, cleaned_album)
+                image_bytes = process_file.album_art_cache.get(cleaned_key)
+                if image_bytes is None:
+                    image_bytes = await fetch_srutil_cover(sr, artist, cleaned_album)
+                    if image_bytes:
+                        process_file.album_art_cache[cleaned_key] = image_bytes
+                if not image_bytes:
+                    image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(cleaned_album))
+                    source = "MusicBrainz (stripped)"
+                    if image_bytes:
+                        process_file.album_art_cache[cleaned_key] = image_bytes
+                if not image_bytes:
+                    image_bytes = search_itunes_cover(artist, cleaned_album)
+                    source = "iTunes (stripped)"
+                    if image_bytes:
+                        process_file.album_art_cache[cleaned_key] = image_bytes
+
+        # If still not found, try with common suffixes (EP, LP, etc.) stripped from album name
+        if not image_bytes:
+            suffix_stripped_album = strip_album_suffix(album)
+            if suffix_stripped_album and suffix_stripped_album != album:
+                suffix_key = (artist, suffix_stripped_album)
+                image_bytes = process_file.album_art_cache.get(suffix_key)
+                if image_bytes is None:
+                    image_bytes = await fetch_srutil_cover(sr, artist, suffix_stripped_album)
+                    if image_bytes:
+                        process_file.album_art_cache[suffix_key] = image_bytes
+                if not image_bytes:
+                    image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(suffix_stripped_album))
+                    source = "MusicBrainz (suffix-stripped)"
+                    if image_bytes:
+                        process_file.album_art_cache[suffix_key] = image_bytes
+                if not image_bytes:
+                    image_bytes = search_itunes_cover(artist, suffix_stripped_album)
+                    source = "iTunes (suffix-stripped)"
+                    if image_bytes:
+                        process_file.album_art_cache[suffix_key] = image_bytes
+        if isinstance(image_bytes, bytes):
+            ok = embed_cover(file, image_bytes)
+            status = "Embedded" if ok else "Failed to Embed"
+            if ok:
+                console.print(f"[green]Embedded cover art from {source}:[/green] {file}")
+            else:
+                console.print(f"[red]Failed to embed cover art ({source}):[/red] {file}")
+        elif image_bytes:
+            status = "Failed to Embed (not bytes)"
+            console.print(f"[red]Failed to embed cover art (not bytes) ({source}):[/red] {file}")
+        else:
+            status = "Not Found"
+            source = "-"
+            console.print(f"[red]No cover art found:[/red] {file}")
+        table.add_row(file, status, source)
+        results.append([file, status, source])
+
+async def main():
+    console.print(f"[bold blue]Scanning directory: {MUSIC_DIR}[/bold blue]")
+    sr = SRUtil()
+    results = []
+    files = []
+    for root, _, filenames in os.walk(MUSIC_DIR):
+        for fn in filenames:
+            if os.path.splitext(fn)[1].lower() in AUDIO_EXTS:
+                file_path = os.path.join(root, fn)
+                files.append(file_path)
+
+    table = Table(title="Cover Art Embedding Report")
+    table.add_column("File", style="cyan", overflow="fold")
+    table.add_column("Status", style="green")
+    table.add_column("Source", style="magenta")
+
+
+    sem = asyncio.Semaphore(CONCURRENCY)
+    async def worker(file, sr, table, results, sem, progress, task_id):
+        await process_file(file, sr, table, results, sem)
+        progress.update(task_id, advance=1)
+
+    with Progress(
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        TimeElapsedColumn(),
+    ) as progress:
+        task_id = progress.add_task("Processing files...", total=len(files))
+        # Schedule all workers
+        await asyncio.gather(*(worker(file, sr, table, results, sem, progress, task_id) for file in files))
+
+    # Print summary table and CSV after progress bar
+    console.print(table)
+    with open(REPORT_CSV, "w", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(["File", "Status", "Source"])
+        writer.writerows(results)
+    console.print(f"[bold green]CSV report written to {REPORT_CSV}[/bold green]")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/test/test_search_track.py
+++ b/test/test_search_track.py
@@ -0,0 +1,23 @@
+import asyncio
+import logging
+import sys
+sys.path.insert(0, "..")
+from utils.sr_wrapper import SRUtil
+
+# logging.getLogger("sr_wrapper").propagate = False
+logger = logging.getLogger()
+logger.setLevel(logging.CRITICAL)
+
+async def main():
+    sr = SRUtil()
+    artist, album = "Kadavar - The Sacrament Of Sin".split(" - ")
+    search_res = await sr.get_album_by_name(artist[:8], album)
+    logging.critical("Search result: %s", search_res)
+    album = search_res
+    _cover = await sr.get_cover_by_album_id(album.get('id'), 640)
+    # cover = sr._get_tidal_cover_url(album.get('cover'), 640)
+    logging.critical("Result: %s, Cover: %s", album, _cover)
+    return
+
+
+asyncio.run(main())