Files
api/utils/rip_background.py
codey c2044711fb - Changed API key validation from if not _key in self.constants.API_KEYS to if _key not in self.constants.API_KEYS for better readability.
Enhance RadioUtil playlist handling and deduplication

- Added checks to ensure playlists are initialized and not empty.
- Improved deduplication logic to prevent modifying the original playlist during iteration.
- Added logging for duplicate removal and playlist population.

Add cover art handling in rip_background.py

- Implemented functionality to attach album art if provided in metadata.
- Added error handling for cover art download failures.

Introduce unique filename handling in rip_background.py

- Added `ensure_unique_filename_in_dir` function to prevent overwriting files with the same name.

Refactor SRUtil for improved error handling and metadata fetching

- Introduced `MetadataFetchError` for better error management during metadata retrieval.
- Implemented `_safe_api_call` for resilient API calls with retry logic.
- Enhanced `get_artists_by_name` to optionally group results by artist name.
- Updated various methods to utilize the new error handling and retry mechanisms.
2025-09-22 11:08:48 -04:00

581 lines
24 KiB
Python

import logging
import asyncio
import random
import os
import tarfile
import traceback
import uuid
import subprocess
import shutil
from pathlib import Path
from typing import Optional
from urllib.parse import urlparse, unquote
import aiohttp
from datetime import datetime, timezone
from mediafile import MediaFile, Image, ImageType # type: ignore[import]
from rq import get_current_job
from utils.sr_wrapper import SRUtil, MetadataFetchError
from dotenv import load_dotenv
import re
# ---------- Config ----------
ROOT_DIR = Path("/storage/music2")
MAX_RETRIES = 5
THROTTLE_MIN = 1.0
THROTTLE_MAX = 3.5
DISCORD_WEBHOOK = os.getenv("TRIP_WEBHOOK_URI", "").strip()
HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/116.0.5845.97 Safari/537.36"
),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
}
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
load_dotenv()
sr = SRUtil()
# ---------- Discord helper ----------
async def discord_notify(webhook_url: str, title: str, description: str, target: Optional[str] = None, color: int = 0x00FF00):
embed = {
"title": title,
"description": description[:1900] if description else "",
"color": color,
"timestamp": datetime.now(timezone.utc).isoformat(),
}
if target:
embed["fields"] = [{"name": "Target", "value": str(target), "inline": True}]
payload = {
"embeds": [embed],
}
while True: # permanent retry
try:
async with aiohttp.ClientSession() as session:
async with session.post(webhook_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as resp:
if resp.status >= 400:
text = await resp.text()
raise RuntimeError(f"Discord webhook failed ({resp.status}): {text}")
break
except Exception as e:
print(f"Discord send failed, retrying: {e}")
await asyncio.sleep(5)
def send_log_to_discord(message: str, level: str, target: Optional[str] = None):
colors = {"WARNING": 0xFFA500, "ERROR": 0xFF0000, "CRITICAL": 0xFF0000}
color = colors.get(level.upper(), 0xFFFF00)
async def _send():
await discord_notify(
webhook_url=DISCORD_WEBHOOK,
title=f"{level} in bulk_download",
description=message,
target=target,
color=color
)
try:
asyncio.get_running_loop()
# already in an event loop — schedule a task
asyncio.create_task(_send())
except RuntimeError:
# not in an event loop — safe to run
asyncio.run(_send())
# ---------- Helpers ----------
def tag_with_mediafile(file_path: str, meta: dict):
f = MediaFile(file_path)
def safe_set(attr, value, default=None, cast=None):
if value is None:
value = default
if value is not None:
if cast:
setattr(f, attr, cast(value))
else:
setattr(f, attr, str(value))
safe_set("title", meta.get("title"), default="Unknown Title")
safe_set("artist", meta.get("artist"), default="Unknown Artist")
safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist")
safe_set("album", meta.get("album"), default="Unknown Album")
safe_set("track", meta.get("track_number"), default=0, cast=int)
safe_set("disc", meta.get("disc_number"), default=0, cast=int)
safe_set("isrc", meta.get("isrc"), default="")
safe_set("bpm", meta.get("bpm"), default=0, cast=int)
release_date_str = meta.get("release_date")
release_date_obj = None
if release_date_str:
try:
release_date_obj = datetime.fromisoformat(release_date_str).date()
except ValueError:
try:
release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date()
except Exception:
pass
if release_date_obj:
f.date = release_date_obj
# Attach album art if provided in meta (synchronous fallback)
try:
cover_bytes = meta.get("cover_bytes")
cover_url = None
if not cover_bytes:
cover_url = meta.get("cover_art_url") or meta.get("cover_url")
if not cover_bytes and cover_url:
try:
import requests
resp = requests.get(cover_url, timeout=10)
resp.raise_for_status()
cover_bytes = resp.content
except Exception:
cover_bytes = None
if cover_bytes:
try:
img = Image(cover_bytes, desc=None, type=ImageType.front)
f.images = [img]
except Exception:
pass
except Exception:
pass
f.save()
def sanitize_filename(name: str) -> str:
if not name:
return "Unknown"
name = name.replace("/", "-").replace("\\", "-")
name = re.sub(r'[<>:"|?*\x00-\x1F]', "", name)
name = name.strip().strip(".")
name = re.sub(r"\s+", " ", name)
return name[:180] or "Unknown"
def ensure_unique_path(p: Path) -> Path:
parent = p.parent
stem, suffix = p.stem, p.suffix
existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()}
candidate = f"{stem}{suffix}"
if candidate not in existing:
return parent / candidate
counter = 2
while True:
candidate = f"{stem} ({counter}){suffix}"
if candidate not in existing:
return parent / candidate
counter += 1
def ensure_unique_filename_in_dir(parent: Path, filename: str) -> Path:
"""Return a Path in `parent` with a unique filename.
Handles multi-part extensions like `.tar.gz` so names become
`Name (2).tar.gz` instead of `Name.tar (2).tar.gz`.
"""
parent.mkdir(parents=True, exist_ok=True)
# special-case .tar.gz
if filename.lower().endswith(".tar.gz"):
ext = ".tar.gz"
base = filename[:-len(ext)]
else:
p = Path(filename)
ext = p.suffix
base = p.stem
existing = {f.name for f in parent.iterdir() if f.is_file()}
candidate = f"{base}{ext}"
if candidate not in existing:
return parent / candidate
counter = 2
while True:
candidate = f"{base} ({counter}){ext}"
if candidate not in existing:
return parent / candidate
counter += 1
# ---------- bulk_download ----------
def bulk_download(track_list: list, quality: str = "FLAC"):
"""
RQ job:
- fetches stream URLs
- downloads with retries + throttling
- uses SR metadata to name/organize files
- creates ONE tarball for all tracks
- returns [tarball_path]
- sends relevant messages to Discord
"""
job = get_current_job()
job_id = job.id if job else uuid.uuid4().hex
target = job.meta.get("target") if job else None
staging_root = ROOT_DIR / job_id
if job:
try:
job.meta["track_ids"] = [str(t) for t in (track_list or [])]
job.meta["tracks"] = []
job.meta["progress"] = 0
job.meta["tarball"] = None
job.meta["status"] = "Started"
job.save_meta()
except Exception as e:
send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target)
# Job started Discord message
asyncio.run(discord_notify(
DISCORD_WEBHOOK,
title=f"Job Started: {job_id}",
description=f"Processing `{len(track_list)}` track(s)",
target=target,
color=0x00FFFF
))
async def process_tracks():
per_track_meta = []
all_final_files = []
all_artists = set()
(ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
async with aiohttp.ClientSession(headers=HEADERS) as session:
# Set up a one-time rate-limit callback to notify on the first 429 seen by SRUtil
async def _rate_limit_notify(exc: Exception):
try:
send_log_to_discord(f"Rate limit observed while fetching metadata: {exc}", "WARNING", target)
except Exception:
pass
# attach callback and reset notified flag for this job run
try:
sr.on_rate_limit = _rate_limit_notify
sr._rate_limit_notified = False
except Exception:
pass
total = len(track_list or [])
for i, track_id in enumerate(track_list or []):
track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0}
attempt = 0
while attempt < MAX_RETRIES:
tmp_file = None
attempt += 1
track_info["attempts"] = attempt
try:
sr.get_cover_by_album_id
url = await sr.get_stream_url_by_track_id(track_id, quality)
if not url:
raise RuntimeError("No stream URL")
parsed = urlparse(url)
clean_path = unquote(parsed.path)
ext = Path(clean_path).suffix or ".mp3"
tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}")
async with session.get(url) as resp:
resp.raise_for_status()
with open(tmp_file, "wb") as f:
async for chunk in resp.content.iter_chunked(64 * 1024):
f.write(chunk)
try:
md = await sr.get_metadata_by_track_id(track_id) or {}
except MetadataFetchError as me:
# Permanent metadata failure — notify and continue (mark track failed)
msg = f"Metadata permanently failed for track {track_id}: {me}"
try:
send_log_to_discord(msg, "ERROR", target)
except Exception:
pass
track_info["status"] = "Failed"
track_info["error"] = str(me)
per_track_meta.append(track_info)
if job:
job.meta["tracks"] = per_track_meta
job.meta["progress"] = int(((i + 1) / total) * 100)
job.save_meta()
break
artist_raw = md.get("artist") or "Unknown Artist"
album_raw = md.get("album") or "Unknown Album"
title_raw = md.get("title") or f"Track {track_id}"
artist = sanitize_filename(artist_raw)
album = sanitize_filename(album_raw)
title = sanitize_filename(title_raw)
all_artists.add(artist)
album_dir = staging_root / artist / album
album_dir.mkdir(parents=True, exist_ok=True)
final_file = ensure_unique_path(album_dir / f"{title}{ext}")
# Move file into final location first (tags will be updated on moved file)
tmp_file.rename(final_file)
# Try to fetch cover art via SRUtil (use album_id from metadata)
try:
album_field = md.get("album")
album_id = md.get("album_id") or (album_field.get("id") if isinstance(album_field, dict) else None)
except Exception:
album_id = None
if album_id:
try:
cover_url = await sr.get_cover_by_album_id(album_id, size=640)
except Exception:
cover_url = None
else:
cover_url = md.get("cover_url")
# Embed tags + artwork using music_tag if available, falling back to mediafile tagging
embedded = False
try:
if cover_url:
try:
timeout = aiohttp.ClientTimeout(total=15)
async with session.get(cover_url, timeout=timeout) as img_resp:
if img_resp.status == 200:
img_bytes = await img_resp.read()
else:
img_bytes = None
# Notify Discord about failed cover download (HTTP error)
try:
send_log_to_discord(
f"Cover download HTTP `{img_resp.status}` for track `{track_id} album_id={album_id} url={cover_url} artist={artist} album={album}`",
"WARNING",
target,
)
except Exception:
pass
except Exception as e:
img_bytes = None
# Notify Discord about exception during cover download
try:
send_log_to_discord(
f"Cover download exception for track `{track_id} album_id={album_id} url={cover_url} artist={artist} album={album}`: `{e}`",
"WARNING",
target,
)
except Exception:
pass
else:
img_bytes = None
# Prefer music_tag if available (keeps compatibility with add_cover_art.py)
try:
from music_tag import load_file as mt_load_file # type: ignore
try:
mf = mt_load_file(str(final_file))
# set basic tags
if md.get('title'):
mf['title'] = md.get('title')
if md.get('artist'):
mf['artist'] = md.get('artist')
if md.get('album'):
mf['album'] = md.get('album')
tracknum = md.get('track_number')
if tracknum is not None:
try:
mf['tracknumber'] = int(tracknum)
except Exception:
pass
if img_bytes:
mf['artwork'] = img_bytes
mf.save()
embedded = True
except Exception:
embedded = False
except Exception:
embedded = False
# If music_tag not available or failed, fallback to mediafile tagging
if not embedded:
# If we had a cover_url but no bytes, log a warning to Discord
try:
if cover_url and not img_bytes:
send_log_to_discord(
f"Cover art not available for track {track_id} album_id={album_id} url={cover_url}",
"WARNING",
target,
)
except Exception:
pass
tag_with_mediafile(str(final_file), md)
except Exception:
# Ensure at least the basic tags are written
try:
tag_with_mediafile(str(final_file), md)
except Exception:
pass
tmp_file = None
track_info["status"] = "Success"
track_info["file_path"] = str(final_file)
track_info["error"] = None
all_final_files.append(final_file)
if job:
job.meta["progress"] = int(((i + 1) / total) * 100)
job.meta["tracks"] = per_track_meta + [track_info]
job.save_meta()
break
except aiohttp.ClientResponseError as e:
msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}"
send_log_to_discord(msg, "WARNING", target)
if e.status == 429:
wait_time = min(60, 2**attempt)
await asyncio.sleep(wait_time)
else:
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
except Exception as e:
tb = traceback.format_exc()
msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
send_log_to_discord(msg, "ERROR", target)
track_info["error"] = str(e)
if attempt >= MAX_RETRIES:
track_info["status"] = "Failed"
send_log_to_discord(f"Track {track_id} failed after {attempt} attempts", "ERROR", target)
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
finally:
try:
if tmp_file and tmp_file.exists():
os.remove(tmp_file)
except Exception:
pass
per_track_meta.append(track_info)
if not all_final_files:
if job:
job.meta["tarball"] = None
job.meta["status"] = "Failed"
job.save_meta()
send_log_to_discord(f"No tracks were successfully downloaded for job `{job_id}`", "CRITICAL", target)
return []
# Tarball creation
artist_counts = {}
for t in per_track_meta:
if t["status"] == "Success" and t.get("file_path"):
try:
artist = Path(t["file_path"]).relative_to(staging_root).parts[0]
except Exception:
artist = "Unknown Artist"
artist_counts[artist] = artist_counts.get(artist, 0) + 1
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist"
# Prefer `job.meta['target']` when provided by the enqueuer. Fall back to the top artist.
target_name = None
try:
if job and job.meta:
target_name = job.meta.get("target")
except Exception:
target_name = None
base_label = sanitize_filename(target_name) if target_name else sanitize_filename(top_artist)
staged_tarball = staging_root / f"{base_label}.tar.gz"
counter = 1
base_name = staged_tarball.stem
while staged_tarball.exists():
counter += 1
staged_tarball = staging_root / f"{base_name} ({counter}).tar.gz"
final_dir = ROOT_DIR / "completed" / quality
final_dir.mkdir(parents=True, exist_ok=True)
# Ensure we don't overwrite an existing final tarball. Preserve `.tar.gz` style.
final_tarball = ensure_unique_filename_in_dir(final_dir, staged_tarball.name)
if job:
job.meta["status"] = "Compressing"
job.save_meta()
logging.info("Creating tarball: %s", staged_tarball)
await discord_notify(DISCORD_WEBHOOK,
title=f"Compressing: Job {job_id}",
description=f"Creating tarball: `{len(all_final_files)}` track(s).\nStaging path: {staged_tarball}",
color=0xFFA500,
target=target)
try:
subprocess.run(
["tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root)]
+ [str(f.relative_to(staging_root)) for f in all_final_files],
check=True,
)
for f in all_final_files:
try:
os.remove(f)
except Exception:
pass
except FileNotFoundError:
send_log_to_discord("pigz not available, falling back to tarfile (slower).", "WARNING", target)
with tarfile.open(staged_tarball, "w:gz") as tar:
for f in all_final_files:
try:
arcname = f.relative_to(staging_root)
except ValueError:
arcname = f.name
tar.add(f, arcname=str(arcname))
try:
os.remove(f)
except Exception:
pass
if not staged_tarball.exists():
send_log_to_discord(f"Tarball was not created: `{staged_tarball}`", "CRITICAL", target)
if job:
job.meta["status"] = "compress_failed"
job.save_meta()
return []
try:
staged_tarball.rename(final_tarball)
except Exception:
shutil.move(str(staged_tarball), str(final_tarball))
await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
if job:
job.meta["tarball"] = str(final_tarball)
job.meta["progress"] = 100
job.meta["status"] = "Completed"
job.save_meta()
# Job completed Discord message
completed = len(all_final_files)
failed = (len(track_list) - completed)
await discord_notify(
DISCORD_WEBHOOK,
title=f"Job Completed: {job_id}",
description=f"Processed `{len(track_list)}` track(s).\nCompleted: `{completed}`\nFailed: `{failed}`\nTarball: `{final_tarball}`",
target=target,
color=0x00FF00
)
return [str(final_tarball)]
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(process_tracks())
except Exception as e:
send_log_to_discord(f"bulk_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target)
if job:
job.meta["status"] = "Failed"
job.save_meta()
finally:
loop.close()