This commit is contained in:
2025-09-18 08:13:21 -04:00
parent 3b74333b96
commit e1194475b3
5 changed files with 661 additions and 188 deletions

1
.gitignore vendored
View File

@@ -30,3 +30,4 @@ job_review.py
check_missing.py check_missing.py
**/auth/* **/auth/*
.gitignore .gitignore
.env

341
test/add_cover_art.py Normal file
View File

@@ -0,0 +1,341 @@
import os
import csv
import re
import time
import sys
import random
import asyncio
import logging
import traceback
import requests
from music_tag import load_file
from rich.console import Console
from rich.table import Table
from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn, TaskProgressColumn
sys.path.insert(0, "..")
from utils.sr_wrapper import SRUtil
from rapidfuzz import fuzz
# Helper to strip common parenthetical tags from album names
def strip_album_tags(album):
"""Remove common parenthetical tags from the end of album names."""
pattern = r"\s*\((deluxe|remaster(ed)?|original mix|expanded|bonus|edition|version|mono|stereo|explicit|clean|anniversary|special|reissue|expanded edition|bonus track(s)?|international|digital|single|ep|live|instrumental|karaoke|radio edit|explicit version|clean version|acoustic|demo|re-recorded|remix|mix|edit|feat\.?|featuring|with .+|from .+|soundtrack|ost|score|session|vol(ume)? ?\d+|disc ?\d+|cd ?\d+|lp ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])\)$"
return re.sub(pattern, "", album, flags=re.IGNORECASE).strip()
# Helper to strip common trailing tags like EP, LP, Single, Album, etc. from album names
def strip_album_suffix(album):
# Remove trailing tags like ' EP', ' LP', ' Single', ' Album', ' Remix', ' Version', etc.
# Only if they appear at the end, case-insensitive, with or without punctuation
suffix_pattern = r"[\s\-_:]*(ep|lp|single|album|remix|version|edit|mix|deluxe|expanded|anniversary|reissue|instrumental|karaoke|ost|score|session|mono|stereo|explicit|clean|bonus|disc ?\d+|cd ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])$"
return re.sub(suffix_pattern, "", album, flags=re.IGNORECASE).strip()
# iTunes/Apple Music API fallback
def search_itunes_cover(artist, album):
"""Search iTunes/Apple Music public API for album art."""
import urllib.parse
base_url = "https://itunes.apple.com/search"
params = {
"term": f"{artist} {album}",
"entity": "album",
"limit": 1,
"media": "music"
}
url = f"{base_url}?{urllib.parse.urlencode(params)}"
try:
resp = requests.get(url, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
if data.get("resultCount", 0) == 0:
return None
result = data["results"][0]
# Use the highest-res artwork available
art_url = result.get("artworkUrl100")
if art_url:
art_url = art_url.replace("100x100bb", "600x600bb")
img_resp = requests.get(art_url)
if img_resp.status_code == 200:
return img_resp.content
except Exception:
traceback.format_exc()
pass
return None
# Fuzzy match helper for metadata
def is_fuzzy_match(expected, actual, threshold=80):
if not expected or not actual:
return False
return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold
# Fuzzy match for all fields
def is_metadata_match(expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80):
artist_match = is_fuzzy_match(expected_artist, found_artist, threshold)
album_match = is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True
title_match = is_fuzzy_match(expected_title, found_title, threshold)
return artist_match and album_match and title_match
# Utility to normalize artist/song names for searching
def normalize_name(name):
# Lowercase, strip, remove extra spaces, and remove common punctuation
name = name.lower().strip()
name = re.sub(r"\([0-9]\)$", "", name) # remove (1), (2), etc. at end
name = re.sub(r"[\s_]+", " ", name)
name = re.sub(r"[\(\)\[\]\{\}\'\"\!\?\.,:;`~@#$%^&*+=|\\/<>]", "", name)
return name
# Suppress noisy loggers (aiohttp, urllib3, etc.)
for noisy_logger in [
"aiohttp.client",
"aiohttp.server",
"aiohttp.access",
"urllib3",
"asyncio",
"chardet",
"requests.packages.urllib3",
]:
logging.getLogger(noisy_logger).setLevel(logging.CRITICAL)
logging.getLogger(noisy_logger).propagate = False
# Also suppress root logger to CRITICAL for anything not our own
logging.getLogger().setLevel(logging.CRITICAL)
# Directory to scan
MUSIC_DIR = "/storage/music2/completed/FLAC/review"
REPORT_CSV = "cover_art_report.csv"
AUDIO_EXTS = {".flac", ".mp3", ".m4a"}
console = Console()
# MusicBrainz API helpers
# Limit concurrent MusicBrainz requests
MUSICBRAINZ_SEMAPHORE = asyncio.Semaphore(1)
def search_musicbrainz_cover(artist, album, max_retries=4):
url = f"https://musicbrainz.org/ws/2/release-group/?query=artist:{artist} AND release:{album}&fmt=json"
headers = {"User-Agent": "cover-art-script/1.0"}
delay = 1.5
for attempt in range(1, max_retries + 1):
# Limit concurrency
loop = asyncio.get_event_loop()
if MUSICBRAINZ_SEMAPHORE.locked():
loop.run_until_complete(MUSICBRAINZ_SEMAPHORE.acquire())
else:
MUSICBRAINZ_SEMAPHORE.acquire()
try:
resp = requests.get(url, headers=headers)
if resp.status_code == 503:
console.print(f"[yellow]MusicBrainz 503 error, retrying (attempt {attempt})...[/yellow]")
time.sleep(delay + random.uniform(0, 0.5))
delay *= 2
continue
if resp.status_code != 200:
console.print(f"[red]MusicBrainz API error: {resp.status_code}[/red]")
return None
try:
data = resp.json()
except Exception as e:
console.print(f"[red]MusicBrainz API returned invalid JSON for {artist} - {album}: {e}[/red]")
return None
if not data.get("release-groups"):
console.print(f"[red]No release-groups found for {artist} - {album}[/red]")
return None
rgid = data["release-groups"][0]["id"]
caa_url = f"https://coverartarchive.org/release-group/{rgid}/front-500"
caa_resp = requests.get(caa_url)
if caa_resp.status_code == 200:
console.print(f"[green]Found cover art on Cover Art Archive for {artist} - {album}[/green]")
return caa_resp.content
console.print(f"[red]No cover art found on Cover Art Archive for {artist} - {album}[/red]")
return None
finally:
try:
MUSICBRAINZ_SEMAPHORE.release()
except Exception:
pass
console.print(f"[red]MusicBrainz API failed after {max_retries} attempts for {artist} - {album}[/red]")
return None
async def fetch_srutil_cover(sr, artist, song):
try:
album = await sr.get_album_by_name(artist, song)
if not album or not album.get('id'):
return None
cover_url = await sr.get_cover_by_album_id(album['id'], 640)
if cover_url:
resp = requests.get(cover_url)
if resp.status_code == 200:
return resp.content
else:
console.print(f"[red]SRUtil: Failed to fetch cover art from URL (status {resp.status_code}): {cover_url}[/red]")
except Exception as e:
msg = str(e)
if "Cannot combine AUTHORIZATION header with AUTH argument" in msg:
console.print("[red]SRUtil: Skipping due to conflicting authentication method in dependency (AUTHORIZATION header + AUTH argument).[/red]")
else:
console.print(f"[red]SRUtil: Exception: {e}[/red]")
return None
def has_cover(file):
try:
f = load_file(file)
has = bool(f['artwork'].first)
return has
except Exception as e:
console.print(f"[red]Error checking cover art for {file}: {e}[/red]")
return False
def embed_cover(file, image_bytes):
try:
f = load_file(file)
f['artwork'] = image_bytes
f.save()
return True
except Exception as e:
console.print(f"[red]Failed to embed cover art into {file}: {e}[/red]")
return False
def get_artist_album_title(file):
try:
f = load_file(file)
artist = f['artist'].value or ""
album = f['album'].value or ""
title = f['title'].value or os.path.splitext(os.path.basename(file))[0]
return artist, album, title
except Exception as e:
console.print(f"[red]Error reading tags for {file}: {e}[/red]")
return "", "", os.path.splitext(os.path.basename(file))[0]
# Concurrency limit for async processing
CONCURRENCY = 12
async def process_file(file, sr, table, results, sem):
async with sem:
if has_cover(file):
table.add_row(file, "Already Present", "-")
results.append([file, "Already Present", "-"])
return
artist, album, title = get_artist_album_title(file)
# Use a global or passed-in cache dict for album art
if not hasattr(process_file, "album_art_cache"):
process_file.album_art_cache = {}
album_key = (artist, album)
image_bytes = process_file.album_art_cache.get(album_key)
source = "SRUtil"
if image_bytes is None:
image_bytes = await fetch_srutil_cover(sr, artist, album)
if image_bytes:
process_file.album_art_cache[album_key] = image_bytes
if not image_bytes:
image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(album))
source = "MusicBrainz"
if image_bytes:
process_file.album_art_cache[album_key] = image_bytes
if not image_bytes:
image_bytes = search_itunes_cover(artist, album)
source = "iTunes"
if image_bytes:
process_file.album_art_cache[album_key] = image_bytes
# If all lookups failed, try with parenthetical tag stripped
if not image_bytes and re.search(r"\([^)]*\)$", album):
cleaned_album = strip_album_tags(album)
if cleaned_album and cleaned_album != album:
cleaned_key = (artist, cleaned_album)
image_bytes = process_file.album_art_cache.get(cleaned_key)
if image_bytes is None:
image_bytes = await fetch_srutil_cover(sr, artist, cleaned_album)
if image_bytes:
process_file.album_art_cache[cleaned_key] = image_bytes
if not image_bytes:
image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(cleaned_album))
source = "MusicBrainz (stripped)"
if image_bytes:
process_file.album_art_cache[cleaned_key] = image_bytes
if not image_bytes:
image_bytes = search_itunes_cover(artist, cleaned_album)
source = "iTunes (stripped)"
if image_bytes:
process_file.album_art_cache[cleaned_key] = image_bytes
# If still not found, try with common suffixes (EP, LP, etc.) stripped from album name
if not image_bytes:
suffix_stripped_album = strip_album_suffix(album)
if suffix_stripped_album and suffix_stripped_album != album:
suffix_key = (artist, suffix_stripped_album)
image_bytes = process_file.album_art_cache.get(suffix_key)
if image_bytes is None:
image_bytes = await fetch_srutil_cover(sr, artist, suffix_stripped_album)
if image_bytes:
process_file.album_art_cache[suffix_key] = image_bytes
if not image_bytes:
image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(suffix_stripped_album))
source = "MusicBrainz (suffix-stripped)"
if image_bytes:
process_file.album_art_cache[suffix_key] = image_bytes
if not image_bytes:
image_bytes = search_itunes_cover(artist, suffix_stripped_album)
source = "iTunes (suffix-stripped)"
if image_bytes:
process_file.album_art_cache[suffix_key] = image_bytes
if isinstance(image_bytes, bytes):
ok = embed_cover(file, image_bytes)
status = "Embedded" if ok else "Failed to Embed"
if ok:
console.print(f"[green]Embedded cover art from {source}:[/green] {file}")
else:
console.print(f"[red]Failed to embed cover art ({source}):[/red] {file}")
elif image_bytes:
status = "Failed to Embed (not bytes)"
console.print(f"[red]Failed to embed cover art (not bytes) ({source}):[/red] {file}")
else:
status = "Not Found"
source = "-"
console.print(f"[red]No cover art found:[/red] {file}")
table.add_row(file, status, source)
results.append([file, status, source])
async def main():
console.print(f"[bold blue]Scanning directory: {MUSIC_DIR}[/bold blue]")
sr = SRUtil()
results = []
files = []
for root, _, filenames in os.walk(MUSIC_DIR):
for fn in filenames:
if os.path.splitext(fn)[1].lower() in AUDIO_EXTS:
file_path = os.path.join(root, fn)
files.append(file_path)
table = Table(title="Cover Art Embedding Report")
table.add_column("File", style="cyan", overflow="fold")
table.add_column("Status", style="green")
table.add_column("Source", style="magenta")
sem = asyncio.Semaphore(CONCURRENCY)
async def worker(file, sr, table, results, sem, progress, task_id):
await process_file(file, sr, table, results, sem)
progress.update(task_id, advance=1)
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task_id = progress.add_task("Processing files...", total=len(files))
# Schedule all workers
await asyncio.gather(*(worker(file, sr, table, results, sem, progress, task_id) for file in files))
# Print summary table and CSV after progress bar
console.print(table)
with open(REPORT_CSV, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["File", "Status", "Source"])
writer.writerows(results)
console.print(f"[bold green]CSV report written to {REPORT_CSV}[/bold green]")
if __name__ == "__main__":
asyncio.run(main())

23
test/test_search_track.py Normal file
View File

@@ -0,0 +1,23 @@
import asyncio
import logging
import sys
sys.path.insert(0, "..")
from utils.sr_wrapper import SRUtil
# logging.getLogger("sr_wrapper").propagate = False
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)
async def main():
sr = SRUtil()
artist, album = "Kadavar - The Sacrament Of Sin".split(" - ")
search_res = await sr.get_album_by_name(artist[:8], album)
logging.critical("Search result: %s", search_res)
album = search_res
_cover = await sr.get_cover_by_album_id(album.get('id'), 640)
# cover = sr._get_tidal_cover_url(album.get('cover'), 640)
logging.critical("Result: %s, Cover: %s", album, _cover)
return
asyncio.run(main())

View File

@@ -7,20 +7,23 @@ import traceback
import uuid import uuid
import subprocess import subprocess
import shutil import shutil
import re
from pathlib import Path from pathlib import Path
from typing import Optional
from urllib.parse import urlparse, unquote from urllib.parse import urlparse, unquote
import aiohttp import aiohttp
from datetime import datetime from datetime import datetime, timezone
from mediafile import MediaFile # type: ignore[import] from mediafile import MediaFile # type: ignore[import]
from rq import get_current_job from rq import get_current_job
from utils.sr_wrapper import SRUtil from utils.sr_wrapper import SRUtil
from dotenv import load_dotenv
import re
# ---------- Config ---------- # ---------- Config ----------
ROOT_DIR = Path("/storage/music2") ROOT_DIR = Path("/storage/music2")
MAX_RETRIES = 5 MAX_RETRIES = 5
THROTTLE_MIN = 1.0 THROTTLE_MIN = 1.0
THROTTLE_MAX = 3.5 THROTTLE_MAX = 3.5
DISCORD_WEBHOOK = os.getenv("TRIP_WEBHOOK_URI", "").strip()
HEADERS = { HEADERS = {
"User-Agent": ( "User-Agent": (
@@ -38,26 +41,71 @@ logging.basicConfig(
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
) )
load_dotenv()
sr = SRUtil() sr = SRUtil()
# ---------- Discord helper ----------
async def discord_notify(webhook_url: str, title: str, description: str, target: Optional[str] = None, color: int = 0x00FF00):
embed = {
"title": title,
"description": description[:1900] if description else "",
"color": color,
"timestamp": datetime.now(timezone.utc).isoformat(),
}
if target:
embed["fields"] = [{"name": "Target", "value": str(target), "inline": True}]
payload = {
"embeds": [embed],
}
while True: # permanent retry
try:
async with aiohttp.ClientSession() as session:
async with session.post(webhook_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as resp:
if resp.status >= 400:
text = await resp.text()
raise RuntimeError(f"Discord webhook failed ({resp.status}): {text}")
break
except Exception as e:
print(f"Discord send failed, retrying: {e}")
await asyncio.sleep(5)
def send_log_to_discord(message: str, level: str, target: Optional[str] = None):
colors = {"WARNING": 0xFFA500, "ERROR": 0xFF0000, "CRITICAL": 0xFF0000}
color = colors.get(level.upper(), 0xFFFF00)
async def _send():
await discord_notify(
webhook_url=DISCORD_WEBHOOK,
title=f"{level} in bulk_download",
description=message,
target=target,
color=color
)
try:
asyncio.get_running_loop()
# already in an event loop — schedule a task
asyncio.create_task(_send())
except RuntimeError:
# not in an event loop — safe to run
asyncio.run(_send())
# ---------- Helpers ---------- # ---------- Helpers ----------
def tag_with_mediafile(file_path: str, meta: dict): def tag_with_mediafile(file_path: str, meta: dict):
f = MediaFile(file_path) f = MediaFile(file_path)
# --- Helper to safely set textual/number fields ---
def safe_set(attr, value, default=None, cast=None): def safe_set(attr, value, default=None, cast=None):
if value is None: if value is None:
value = default value = default
if value is not None: if value is not None:
if cast is not None: if cast:
setattr(f, attr, cast(value)) setattr(f, attr, cast(value))
else: else:
setattr(f, attr, str(value)) setattr(f, attr, str(value))
# --- Basic textual metadata ---
safe_set("title", meta.get("title"), default="Unknown Title") safe_set("title", meta.get("title"), default="Unknown Title")
safe_set("artist", meta.get("artist"), default="Unknown Artist") safe_set("artist", meta.get("artist"), default="Unknown Artist")
safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist") safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist")
@@ -66,8 +114,6 @@ def tag_with_mediafile(file_path: str, meta: dict):
safe_set("disc", meta.get("disc_number"), default=0, cast=int) safe_set("disc", meta.get("disc_number"), default=0, cast=int)
safe_set("isrc", meta.get("isrc"), default="") safe_set("isrc", meta.get("isrc"), default="")
safe_set("bpm", meta.get("bpm"), default=0, cast=int) safe_set("bpm", meta.get("bpm"), default=0, cast=int)
# --- Release date ---
release_date_str = meta.get("release_date") release_date_str = meta.get("release_date")
release_date_obj = None release_date_obj = None
if release_date_str: if release_date_str:
@@ -75,34 +121,15 @@ def tag_with_mediafile(file_path: str, meta: dict):
release_date_obj = datetime.fromisoformat(release_date_str).date() release_date_obj = datetime.fromisoformat(release_date_str).date()
except ValueError: except ValueError:
try: try:
# fallback if only year string
release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date() release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date()
except Exception: except Exception:
pass pass
if release_date_obj: if release_date_obj:
f.date = release_date_obj f.date = release_date_obj
# --- Save all tags ---
f.save() f.save()
def cleanup_empty_dirs(root: Path):
"""
Recursively remove any directories under root that contain no files
(empty or only empty subdirectories).
"""
for dirpath, dirnames, filenames in os.walk(root, topdown=False):
p = Path(dirpath)
has_file = any(f.is_file() for f in p.rglob("*"))
if not has_file:
try:
p.rmdir()
except Exception:
pass
def sanitize_filename(name: str) -> str: def sanitize_filename(name: str) -> str:
"""Make a string safe for file/dir names."""
if not name: if not name:
return "Unknown" return "Unknown"
name = name.replace("/", "-").replace("\\", "-") name = name.replace("/", "-").replace("\\", "-")
@@ -113,18 +140,12 @@ def sanitize_filename(name: str) -> str:
def ensure_unique_path(p: Path) -> Path: def ensure_unique_path(p: Path) -> Path:
"""
Ensure the given file or directory path is unique *within its parent folder*.
Only appends (2), (3)... if a real conflict exists in that folder.
"""
parent = p.parent parent = p.parent
stem, suffix = p.stem, p.suffix stem, suffix = p.stem, p.suffix
existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()} existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()}
candidate = f"{stem}{suffix}" candidate = f"{stem}{suffix}"
if candidate not in existing: if candidate not in existing:
return parent / candidate return parent / candidate
counter = 2 counter = 2
while True: while True:
candidate = f"{stem} ({counter}){suffix}" candidate = f"{stem} ({counter}){suffix}"
@@ -133,7 +154,7 @@ def ensure_unique_path(p: Path) -> Path:
counter += 1 counter += 1
# ---------- Job ---------- # ---------- bulk_download ----------
def bulk_download(track_list: list, quality: str = "FLAC"): def bulk_download(track_list: list, quality: str = "FLAC"):
""" """
RQ job: RQ job:
@@ -142,9 +163,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
- uses SR metadata to name/organize files - uses SR metadata to name/organize files
- creates ONE tarball for all tracks - creates ONE tarball for all tracks
- returns [tarball_path] - returns [tarball_path]
- sends relevant messages to Discord
""" """
job = get_current_job() job = get_current_job()
job_id = job.id if job else uuid.uuid4().hex job_id = job.id if job else uuid.uuid4().hex
target = job.meta.get("target") if job else None
staging_root = ROOT_DIR / job_id staging_root = ROOT_DIR / job_id
if job: if job:
@@ -156,30 +179,27 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
job.meta["status"] = "Started" job.meta["status"] = "Started"
job.save_meta() job.save_meta()
except Exception as e: except Exception as e:
logging.warning("Failed to init job.meta: %s", e) send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target)
# Job started Discord message
asyncio.run(discord_notify(
DISCORD_WEBHOOK,
title=f"Job Started: {job_id}",
description=f"Processing `{len(track_list)}` track(s)",
target=target,
color=0x00FFFF
))
async def process_tracks(): async def process_tracks():
per_track_meta = [] per_track_meta = []
all_final_files = [] all_final_files = []
all_artists = set() all_artists = set()
(ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True) (ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
async with aiohttp.ClientSession(headers=HEADERS) as session: async with aiohttp.ClientSession(headers=HEADERS) as session:
total = len(track_list or []) total = len(track_list or [])
logging.critical("Total tracks to process: %s", total)
if job:
job.meta["progress"] = 0
job.save_meta()
for i, track_id in enumerate(track_list or []): for i, track_id in enumerate(track_list or []):
track_info = { track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0}
"track_id": str(track_id),
"status": "Pending",
"file_path": None,
"error": None,
"attempts": 0,
}
attempt = 0 attempt = 0
while attempt < MAX_RETRIES: while attempt < MAX_RETRIES:
@@ -195,7 +215,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
parsed = urlparse(url) parsed = urlparse(url)
clean_path = unquote(parsed.path) clean_path = unquote(parsed.path)
ext = Path(clean_path).suffix or ".mp3" ext = Path(clean_path).suffix or ".mp3"
tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}") tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}")
async with session.get(url) as resp: async with session.get(url) as resp:
@@ -205,7 +224,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
f.write(chunk) f.write(chunk)
md = await sr.get_metadata_by_track_id(track_id) or {} md = await sr.get_metadata_by_track_id(track_id) or {}
logging.info("Metadata for %s: %s", track_id, md)
artist_raw = md.get("artist") or "Unknown Artist" artist_raw = md.get("artist") or "Unknown Artist"
album_raw = md.get("album") or "Unknown Album" album_raw = md.get("album") or "Unknown Album"
title_raw = md.get("title") or f"Track {track_id}" title_raw = md.get("title") or f"Track {track_id}"
@@ -215,14 +233,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
title = sanitize_filename(title_raw) title = sanitize_filename(title_raw)
all_artists.add(artist) all_artists.add(artist)
album_dir = staging_root / artist / album
artist_dir = staging_root / artist
album_dir = artist_dir / album
album_dir.mkdir(parents=True, exist_ok=True) album_dir.mkdir(parents=True, exist_ok=True)
final_file = ensure_unique_path(album_dir / f"{title}{ext}") final_file = ensure_unique_path(album_dir / f"{title}{ext}")
tag_with_mediafile(str(tmp_file), md)
tag_with_mediafile(str(tmp_file), md)
tmp_file.rename(final_file) tmp_file.rename(final_file)
tmp_file = None tmp_file = None
@@ -233,60 +248,48 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
if job: if job:
job.meta["progress"] = int(((i + 1) / total) * 100) job.meta["progress"] = int(((i + 1) / total) * 100)
job.meta["tracks"] = per_track_meta + [track_info]
job.save_meta() job.save_meta()
break break
except aiohttp.ClientResponseError as e: except aiohttp.ClientResponseError as e:
msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}"
send_log_to_discord(msg, "WARNING", target)
if e.status == 429: if e.status == 429:
wait_time = min(60, 2**attempt) # exponential up to 60s wait_time = min(60, 2**attempt)
logging.warning(
"Rate limited (429). Sleeping %s seconds", wait_time
)
await asyncio.sleep(wait_time) await asyncio.sleep(wait_time)
else: else:
await asyncio.sleep( await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
)
except Exception as e: except Exception as e:
logging.error( tb = traceback.format_exc()
"Track %s attempt %s failed: %s", track_id, attempt, e msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
) send_log_to_discord(msg, "ERROR", target)
traceback.print_exc()
track_info["error"] = str(e) track_info["error"] = str(e)
if attempt >= MAX_RETRIES: if attempt >= MAX_RETRIES:
track_info["status"] = "Failed" track_info["status"] = "Failed"
send_log_to_discord(f"Track {track_id} failed after {attempt} attempts", "ERROR", target)
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
finally: finally:
try: try:
if tmp_file and tmp_file.exists(): if tmp_file and tmp_file.exists():
tmp_file.unlink() os.remove(tmp_file)
except Exception: except Exception:
pass pass
per_track_meta.append(track_info) per_track_meta.append(track_info)
if job:
try:
job.meta["tracks"] = per_track_meta
job.save_meta()
except Exception as e:
logging.warning(
"Failed to update job.meta after track %s: %s", track_id, e
)
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
if not all_final_files: if not all_final_files:
if job: if job:
try:
job.meta["tarball"] = None job.meta["tarball"] = None
job.meta["status"] = "Failed" job.meta["status"] = "Failed"
job.save_meta() job.save_meta()
except Exception: send_log_to_discord(f"No tracks were successfully downloaded for job `{job_id}`", "CRITICAL", target)
pass
return [] return []
artist_counts: dict[str, int] = {} # Tarball creation
artist_counts = {}
for t in per_track_meta: for t in per_track_meta:
if t["status"] == "Success" and t.get("file_path"): if t["status"] == "Success" and t.get("file_path"):
try: try:
@@ -294,17 +297,10 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
except Exception: except Exception:
artist = "Unknown Artist" artist = "Unknown Artist"
artist_counts[artist] = artist_counts.get(artist, 0) + 1 artist_counts[artist] = artist_counts.get(artist, 0) + 1
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist"
if artist_counts:
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[
0
][0]
else:
top_artist = "Unknown Artist"
combined_artist = sanitize_filename(top_artist) combined_artist = sanitize_filename(top_artist)
staged_tarball = staging_root / f"{combined_artist}.tar.gz" staged_tarball = staging_root / f"{combined_artist}.tar.gz"
# Ensure uniqueness (Windows-style padding) within the parent folder
counter = 1 counter = 1
base_name = staged_tarball.stem base_name = staged_tarball.stem
while staged_tarball.exists(): while staged_tarball.exists():
@@ -315,26 +311,18 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
final_tarball.parent.mkdir(parents=True, exist_ok=True) final_tarball.parent.mkdir(parents=True, exist_ok=True)
if job: if job:
try:
job.meta["status"] = "Compressing" job.meta["status"] = "Compressing"
job.save_meta() job.save_meta()
except Exception:
pass
logging.info("Creating tarball: %s", staged_tarball) logging.info("Creating tarball: %s", staged_tarball)
await discord_notify(DISCORD_WEBHOOK,
def _create_tar_sync(): title=f"Compressing: Job {job_id}",
description=f"Creating tarball (`{len(track_list)}` track(s)).\nStaging path: {staged_tarball}",
color=0xFFA500,
target=target)
try: try:
subprocess.run( subprocess.run(
[ ["tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root)]
"tar",
"-I",
"pigz -9",
"-cf",
str(staged_tarball),
"-C",
str(staging_root),
]
+ [str(f.relative_to(staging_root)) for f in all_final_files], + [str(f.relative_to(staging_root)) for f in all_final_files],
check=True, check=True,
) )
@@ -344,7 +332,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
except Exception: except Exception:
pass pass
except FileNotFoundError: except FileNotFoundError:
logging.warning("pigz not available, falling back to tarfile (slower).") send_log_to_discord("pigz not available, falling back to tarfile (slower).", "WARNING", target)
with tarfile.open(staged_tarball, "w:gz") as tar: with tarfile.open(staged_tarball, "w:gz") as tar:
for f in all_final_files: for f in all_final_files:
try: try:
@@ -357,27 +345,18 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
except Exception: except Exception:
pass pass
await asyncio.to_thread(_create_tar_sync)
if not staged_tarball.exists(): if not staged_tarball.exists():
logging.error("Tarball was not created: %s", staged_tarball) send_log_to_discord(f"Tarball was not created: `{staged_tarball}`", "CRITICAL", target)
if job: if job:
try:
job.meta["status"] = "compress_failed" job.meta["status"] = "compress_failed"
job.save_meta() job.save_meta()
except Exception:
pass
return [] return []
logging.critical("Tarball created: %s", staged_tarball)
try: try:
staged_tarball.rename(final_tarball) staged_tarball.rename(final_tarball)
except Exception: except Exception:
shutil.move(str(staged_tarball), str(final_tarball)) shutil.move(str(staged_tarball), str(final_tarball))
logging.critical("Tarball finalized: %s", final_tarball)
await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True) await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
if job: if job:
@@ -386,6 +365,15 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
job.meta["status"] = "Completed" job.meta["status"] = "Completed"
job.save_meta() job.save_meta()
# Job completed Discord message
await discord_notify(
DISCORD_WEBHOOK,
title=f"Job Completed: {job_id}",
description=f"Processed `{len(track_list)}` track(s). Tarball: `{final_tarball}`",
target=target,
color=0x00FF00
)
return [str(final_tarball)] return [str(final_tarball)]
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
@@ -393,9 +381,9 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
try: try:
return loop.run_until_complete(process_tracks()) return loop.run_until_complete(process_tracks())
except Exception as e: except Exception as e:
send_log_to_discord(f"bulk_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target)
if job: if job:
job.meta["status"] = "Failed" job.meta["status"] = "Failed"
job.save_meta() job.save_meta()
logging.critical("Exception: %s", str(e))
finally: finally:
loop.close() loop.close()

View File

@@ -2,7 +2,17 @@ from typing import Optional, Any
from uuid import uuid4 from uuid import uuid4
from urllib.parse import urlparse from urllib.parse import urlparse
import hashlib import hashlib
import traceback
import logging import logging
# Suppress all logging output from this module and its children
for name in [__name__, "utils.sr_wrapper"]:
logger = logging.getLogger(name)
logger.setLevel(logging.CRITICAL)
logger.propagate = False
for handler in logger.handlers:
handler.setLevel(logging.CRITICAL)
# Also set the root logger to CRITICAL as a last resort (may affect global logging)
logging.getLogger().setLevel(logging.CRITICAL)
import random import random
import asyncio import asyncio
import os import os
@@ -11,6 +21,8 @@ import time
from streamrip.client import TidalClient # type: ignore from streamrip.client import TidalClient # type: ignore
from streamrip.config import Config as StreamripConfig # type: ignore from streamrip.config import Config as StreamripConfig # type: ignore
from dotenv import load_dotenv from dotenv import load_dotenv
from rapidfuzz import fuzz
load_dotenv() load_dotenv()
@@ -64,6 +76,17 @@ class SRUtil:
self.last_request_time = time.time() self.last_request_time = time.time()
return result return result
def is_fuzzy_match(self, expected, actual, threshold=80):
if not expected or not actual:
return False
return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold
def is_metadata_match(self, expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80):
artist_match = self.is_fuzzy_match(expected_artist, found_artist, threshold)
album_match = self.is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True
title_match = self.is_fuzzy_match(expected_title, found_title, threshold)
return artist_match and album_match and title_match
def dedupe_by_key(self, key: str, entries: list[dict]) -> list[dict]: def dedupe_by_key(self, key: str, entries: list[dict]) -> list[dict]:
deduped = {} deduped = {}
for entry in entries: for entry in entries:
@@ -78,6 +101,23 @@ class SRUtil:
m, s = divmod(seconds, 60) m, s = divmod(seconds, 60)
return f"{m}:{s:02}" return f"{m}:{s:02}"
def _get_tidal_cover_url(self, uuid, size):
"""Generate a tidal cover url.
:param uuid: VALID uuid string
:param size:
"""
TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg"
possibles = (80, 160, 320, 640, 1280)
assert size in possibles, f"size must be in {possibles}"
return TIDAL_COVER_URL.format(
uuid=uuid.replace("-", "/"),
height=size,
width=size,
)
def combine_album_track_metadata( def combine_album_track_metadata(
self, album_json: dict | None, track_json: dict self, album_json: dict | None, track_json: dict
) -> dict: ) -> dict:
@@ -140,32 +180,33 @@ class SRUtil:
] ]
async def get_artists_by_name(self, artist_name: str) -> Optional[list]: async def get_artists_by_name(self, artist_name: str) -> Optional[list]:
"""Get artist(s) by name. """Get artist(s) by name. Retry login only on authentication failure. Rate limit and retry on 400/429."""
Args: import asyncio
artist_name (str): The name of the artist.
Returns:
Optional[dict]: The artist details or None if not found.
"""
try:
await self.streamrip_client.login()
except Exception as e:
logging.info("Login Exception: %s", str(e))
pass
artists_out: list[dict] = [] artists_out: list[dict] = []
max_retries = 4
delay = 1.0
for attempt in range(max_retries):
try: try:
artists = await self.streamrip_client.search( artists = await self.streamrip_client.search(
media_type="artist", query=artist_name media_type="artist", query=artist_name
) )
break
except AttributeError: except AttributeError:
await self.streamrip_client.login() await self.streamrip_client.login()
artists = await self.streamrip_client.search( if attempt == max_retries - 1:
media_type="artist", query=artist_name return None
) except Exception as e:
logging.critical("Artists output: %s", artists) msg = str(e)
if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
await asyncio.sleep(delay)
delay *= 2
continue
else:
return None
else:
return None
artists = artists[0].get("items", []) artists = artists[0].get("items", [])
if not artists: if not artists:
logging.warning("No artist found for name: %s", artist_name)
return None return None
artists_out = [ artists_out = [
{ {
@@ -179,26 +220,33 @@ class SRUtil:
return artists_out return artists_out
async def get_albums_by_artist_id(self, artist_id: int) -> Optional[list | dict]: async def get_albums_by_artist_id(self, artist_id: int) -> Optional[list | dict]:
"""Get albums by artist ID """Get albums by artist ID. Retry login only on authentication failure. Rate limit and retry on 400/429."""
Args: import asyncio
artist_id (int): The ID of the artist.
Returns:
Optional[list[dict]]: List of albums or None if not found.
"""
artist_id_str: str = str(artist_id) artist_id_str: str = str(artist_id)
albums_out: list[dict] = [] albums_out: list[dict] = []
max_retries = 4
delay = 1.0
for attempt in range(max_retries):
try: try:
await self.streamrip_client.login()
metadata = await self.streamrip_client.get_metadata( metadata = await self.streamrip_client.get_metadata(
item_id=artist_id_str, media_type="artist" item_id=artist_id_str, media_type="artist"
) )
break
except AttributeError: except AttributeError:
await self.streamrip_client.login() await self.streamrip_client.login()
metadata = await self.streamrip_client.get_metadata( if attempt == max_retries - 1:
item_id=artist_id_str, media_type="artist" return None
) except Exception as e:
msg = str(e)
if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
await asyncio.sleep(delay)
delay *= 2
continue
else:
return None
else:
return None
if not metadata: if not metadata:
logging.warning("No metadata found for artist ID: %s", artist_id)
return None return None
albums = self.dedupe_by_key("title", metadata.get("albums", [])) albums = self.dedupe_by_key("title", metadata.get("albums", []))
albums_out = [ albums_out = [
@@ -211,10 +259,66 @@ class SRUtil:
for album in albums for album in albums
if "title" in album and "id" in album and "artists" in album if "title" in album and "id" in album and "artists" in album
] ]
logging.debug("Retrieved albums: %s", albums_out)
return albums_out return albums_out
async def get_album_by_name(self, artist: str, album: str) -> Optional[dict]:
"""Get album by artist and album name using artist ID and fuzzy matching. Try first 8 chars, then 12 if no match. Notify on success."""
# Notification moved to add_cover_art.py as requested
for trunc in (8, 12):
search_artist = artist[:trunc]
artists = await self.get_artists_by_name(search_artist)
if not artists:
continue
best_artist = None
best_artist_score = 0
for a in artists:
score = fuzz.token_set_ratio(artist, a["artist"])
if score > best_artist_score:
best_artist = a
best_artist_score = int(score)
if not best_artist or best_artist_score < 85:
continue
artist_id = best_artist["id"]
albums = await self.get_albums_by_artist_id(artist_id)
if not albums:
continue
best_album = None
best_album_score = 0
for alb in albums:
score = fuzz.token_set_ratio(album, alb["album"])
if score > best_album_score:
best_album = alb
best_album_score = int(score)
if best_album and best_album_score >= 85:
return best_album
return None
async def get_cover_by_album_id(self, album_id: int, size: int = 640) -> Optional[str]:
"""Get cover URL by album ID. Retry login only on authentication failure."""
if size not in [80, 160, 320, 640, 1280]:
return None
album_id_str: str = str(album_id)
for attempt in range(2):
try:
metadata = await self.streamrip_client.get_metadata(
item_id=album_id_str, media_type="album"
)
break
except AttributeError:
await self.streamrip_client.login()
if attempt == 1:
return None
else:
return None
if not metadata:
return None
cover_id = metadata.get("cover")
if not cover_id:
return None
cover_url = self._get_tidal_cover_url(cover_id, size)
return cover_url
async def get_tracks_by_album_id( async def get_tracks_by_album_id(
self, album_id: int, quality: str = "FLAC" self, album_id: int, quality: str = "FLAC"
) -> Optional[list | dict]: ) -> Optional[list | dict]:
@@ -247,7 +351,7 @@ class SRUtil:
] ]
return tracks_out return tracks_out
async def get_tracks_by_artist_song(self, artist: str, song: str) -> Optional[list]: async def get_tracks_by_artist_song(self, artist: str, song: str, n: int = 0) -> Optional[list]:
"""Get track by artist and song name """Get track by artist and song name
Args: Args:
artist (str): The name of the artist. artist (str): The name of the artist.
@@ -256,7 +360,23 @@ class SRUtil:
Optional[dict]: The track details or None if not found. Optional[dict]: The track details or None if not found.
TODO: Reimplement using StreamRip TODO: Reimplement using StreamRip
""" """
if not self.streamrip_client.logged_in:
await self.streamrip_client.login()
try:
search_res = await self.streamrip_client.search(media_type="track",
query=f"{artist} - {song}",
)
logging.critical("Result: %s", search_res)
return search_res[0].get('items')
except Exception as e:
traceback.print_exc()
logging.critical("Search Exception: %s", str(e))
if n < 3:
n+=1
return await self.get_tracks_by_artist_song(artist, song, n)
finally:
return [] return []
# return []
async def get_stream_url_by_track_id( async def get_stream_url_by_track_id(
self, track_id: int, quality: str = "FLAC" self, track_id: int, quality: str = "FLAC"