misc
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -30,3 +30,4 @@ job_review.py
|
|||||||
check_missing.py
|
check_missing.py
|
||||||
**/auth/*
|
**/auth/*
|
||||||
.gitignore
|
.gitignore
|
||||||
|
.env
|
341
test/add_cover_art.py
Normal file
341
test/add_cover_art.py
Normal file
@@ -0,0 +1,341 @@
|
|||||||
|
import os
|
||||||
|
import csv
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import traceback
|
||||||
|
import requests
|
||||||
|
from music_tag import load_file
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn, TaskProgressColumn
|
||||||
|
sys.path.insert(0, "..")
|
||||||
|
from utils.sr_wrapper import SRUtil
|
||||||
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
|
|
||||||
|
# Helper to strip common parenthetical tags from album names
|
||||||
|
def strip_album_tags(album):
|
||||||
|
"""Remove common parenthetical tags from the end of album names."""
|
||||||
|
pattern = r"\s*\((deluxe|remaster(ed)?|original mix|expanded|bonus|edition|version|mono|stereo|explicit|clean|anniversary|special|reissue|expanded edition|bonus track(s)?|international|digital|single|ep|live|instrumental|karaoke|radio edit|explicit version|clean version|acoustic|demo|re-recorded|remix|mix|edit|feat\.?|featuring|with .+|from .+|soundtrack|ost|score|session|vol(ume)? ?\d+|disc ?\d+|cd ?\d+|lp ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])\)$"
|
||||||
|
return re.sub(pattern, "", album, flags=re.IGNORECASE).strip()
|
||||||
|
|
||||||
|
# Helper to strip common trailing tags like EP, LP, Single, Album, etc. from album names
|
||||||
|
def strip_album_suffix(album):
|
||||||
|
# Remove trailing tags like ' EP', ' LP', ' Single', ' Album', ' Remix', ' Version', etc.
|
||||||
|
# Only if they appear at the end, case-insensitive, with or without punctuation
|
||||||
|
suffix_pattern = r"[\s\-_:]*(ep|lp|single|album|remix|version|edit|mix|deluxe|expanded|anniversary|reissue|instrumental|karaoke|ost|score|session|mono|stereo|explicit|clean|bonus|disc ?\d+|cd ?\d+|vinyl|202[0-9]|20[0-1][0-9]|19[0-9][0-9])$"
|
||||||
|
return re.sub(suffix_pattern, "", album, flags=re.IGNORECASE).strip()
|
||||||
|
# iTunes/Apple Music API fallback
|
||||||
|
def search_itunes_cover(artist, album):
|
||||||
|
"""Search iTunes/Apple Music public API for album art."""
|
||||||
|
import urllib.parse
|
||||||
|
base_url = "https://itunes.apple.com/search"
|
||||||
|
params = {
|
||||||
|
"term": f"{artist} {album}",
|
||||||
|
"entity": "album",
|
||||||
|
"limit": 1,
|
||||||
|
"media": "music"
|
||||||
|
}
|
||||||
|
url = f"{base_url}?{urllib.parse.urlencode(params)}"
|
||||||
|
try:
|
||||||
|
resp = requests.get(url, timeout=10)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return None
|
||||||
|
data = resp.json()
|
||||||
|
if data.get("resultCount", 0) == 0:
|
||||||
|
return None
|
||||||
|
result = data["results"][0]
|
||||||
|
# Use the highest-res artwork available
|
||||||
|
art_url = result.get("artworkUrl100")
|
||||||
|
if art_url:
|
||||||
|
art_url = art_url.replace("100x100bb", "600x600bb")
|
||||||
|
img_resp = requests.get(art_url)
|
||||||
|
if img_resp.status_code == 200:
|
||||||
|
return img_resp.content
|
||||||
|
except Exception:
|
||||||
|
traceback.format_exc()
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Fuzzy match helper for metadata
|
||||||
|
def is_fuzzy_match(expected, actual, threshold=80):
|
||||||
|
if not expected or not actual:
|
||||||
|
return False
|
||||||
|
return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold
|
||||||
|
|
||||||
|
# Fuzzy match for all fields
|
||||||
|
def is_metadata_match(expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80):
|
||||||
|
artist_match = is_fuzzy_match(expected_artist, found_artist, threshold)
|
||||||
|
album_match = is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True
|
||||||
|
title_match = is_fuzzy_match(expected_title, found_title, threshold)
|
||||||
|
return artist_match and album_match and title_match
|
||||||
|
|
||||||
|
# Utility to normalize artist/song names for searching
|
||||||
|
def normalize_name(name):
|
||||||
|
# Lowercase, strip, remove extra spaces, and remove common punctuation
|
||||||
|
name = name.lower().strip()
|
||||||
|
name = re.sub(r"\([0-9]\)$", "", name) # remove (1), (2), etc. at end
|
||||||
|
name = re.sub(r"[\s_]+", " ", name)
|
||||||
|
name = re.sub(r"[\(\)\[\]\{\}\'\"\!\?\.,:;`~@#$%^&*+=|\\/<>]", "", name)
|
||||||
|
return name
|
||||||
|
|
||||||
|
# Suppress noisy loggers (aiohttp, urllib3, etc.)
|
||||||
|
for noisy_logger in [
|
||||||
|
"aiohttp.client",
|
||||||
|
"aiohttp.server",
|
||||||
|
"aiohttp.access",
|
||||||
|
"urllib3",
|
||||||
|
"asyncio",
|
||||||
|
"chardet",
|
||||||
|
"requests.packages.urllib3",
|
||||||
|
]:
|
||||||
|
logging.getLogger(noisy_logger).setLevel(logging.CRITICAL)
|
||||||
|
logging.getLogger(noisy_logger).propagate = False
|
||||||
|
|
||||||
|
# Also suppress root logger to CRITICAL for anything not our own
|
||||||
|
logging.getLogger().setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
|
|
||||||
|
# Directory to scan
|
||||||
|
MUSIC_DIR = "/storage/music2/completed/FLAC/review"
|
||||||
|
REPORT_CSV = "cover_art_report.csv"
|
||||||
|
AUDIO_EXTS = {".flac", ".mp3", ".m4a"}
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
# MusicBrainz API helpers
|
||||||
|
|
||||||
|
# Limit concurrent MusicBrainz requests
|
||||||
|
MUSICBRAINZ_SEMAPHORE = asyncio.Semaphore(1)
|
||||||
|
|
||||||
|
def search_musicbrainz_cover(artist, album, max_retries=4):
|
||||||
|
url = f"https://musicbrainz.org/ws/2/release-group/?query=artist:{artist} AND release:{album}&fmt=json"
|
||||||
|
headers = {"User-Agent": "cover-art-script/1.0"}
|
||||||
|
delay = 1.5
|
||||||
|
for attempt in range(1, max_retries + 1):
|
||||||
|
# Limit concurrency
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
if MUSICBRAINZ_SEMAPHORE.locked():
|
||||||
|
loop.run_until_complete(MUSICBRAINZ_SEMAPHORE.acquire())
|
||||||
|
else:
|
||||||
|
MUSICBRAINZ_SEMAPHORE.acquire()
|
||||||
|
try:
|
||||||
|
resp = requests.get(url, headers=headers)
|
||||||
|
if resp.status_code == 503:
|
||||||
|
console.print(f"[yellow]MusicBrainz 503 error, retrying (attempt {attempt})...[/yellow]")
|
||||||
|
time.sleep(delay + random.uniform(0, 0.5))
|
||||||
|
delay *= 2
|
||||||
|
continue
|
||||||
|
if resp.status_code != 200:
|
||||||
|
console.print(f"[red]MusicBrainz API error: {resp.status_code}[/red]")
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[red]MusicBrainz API returned invalid JSON for {artist} - {album}: {e}[/red]")
|
||||||
|
return None
|
||||||
|
if not data.get("release-groups"):
|
||||||
|
console.print(f"[red]No release-groups found for {artist} - {album}[/red]")
|
||||||
|
return None
|
||||||
|
rgid = data["release-groups"][0]["id"]
|
||||||
|
caa_url = f"https://coverartarchive.org/release-group/{rgid}/front-500"
|
||||||
|
caa_resp = requests.get(caa_url)
|
||||||
|
if caa_resp.status_code == 200:
|
||||||
|
console.print(f"[green]Found cover art on Cover Art Archive for {artist} - {album}[/green]")
|
||||||
|
return caa_resp.content
|
||||||
|
console.print(f"[red]No cover art found on Cover Art Archive for {artist} - {album}[/red]")
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
MUSICBRAINZ_SEMAPHORE.release()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
console.print(f"[red]MusicBrainz API failed after {max_retries} attempts for {artist} - {album}[/red]")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def fetch_srutil_cover(sr, artist, song):
|
||||||
|
try:
|
||||||
|
album = await sr.get_album_by_name(artist, song)
|
||||||
|
if not album or not album.get('id'):
|
||||||
|
return None
|
||||||
|
cover_url = await sr.get_cover_by_album_id(album['id'], 640)
|
||||||
|
if cover_url:
|
||||||
|
resp = requests.get(cover_url)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.content
|
||||||
|
else:
|
||||||
|
console.print(f"[red]SRUtil: Failed to fetch cover art from URL (status {resp.status_code}): {cover_url}[/red]")
|
||||||
|
except Exception as e:
|
||||||
|
msg = str(e)
|
||||||
|
if "Cannot combine AUTHORIZATION header with AUTH argument" in msg:
|
||||||
|
console.print("[red]SRUtil: Skipping due to conflicting authentication method in dependency (AUTHORIZATION header + AUTH argument).[/red]")
|
||||||
|
else:
|
||||||
|
console.print(f"[red]SRUtil: Exception: {e}[/red]")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def has_cover(file):
|
||||||
|
try:
|
||||||
|
f = load_file(file)
|
||||||
|
has = bool(f['artwork'].first)
|
||||||
|
return has
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[red]Error checking cover art for {file}: {e}[/red]")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def embed_cover(file, image_bytes):
|
||||||
|
try:
|
||||||
|
f = load_file(file)
|
||||||
|
f['artwork'] = image_bytes
|
||||||
|
f.save()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[red]Failed to embed cover art into {file}: {e}[/red]")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_artist_album_title(file):
|
||||||
|
try:
|
||||||
|
f = load_file(file)
|
||||||
|
artist = f['artist'].value or ""
|
||||||
|
album = f['album'].value or ""
|
||||||
|
title = f['title'].value or os.path.splitext(os.path.basename(file))[0]
|
||||||
|
return artist, album, title
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[red]Error reading tags for {file}: {e}[/red]")
|
||||||
|
return "", "", os.path.splitext(os.path.basename(file))[0]
|
||||||
|
|
||||||
|
|
||||||
|
# Concurrency limit for async processing
|
||||||
|
CONCURRENCY = 12
|
||||||
|
|
||||||
|
async def process_file(file, sr, table, results, sem):
|
||||||
|
async with sem:
|
||||||
|
if has_cover(file):
|
||||||
|
table.add_row(file, "Already Present", "-")
|
||||||
|
results.append([file, "Already Present", "-"])
|
||||||
|
return
|
||||||
|
artist, album, title = get_artist_album_title(file)
|
||||||
|
# Use a global or passed-in cache dict for album art
|
||||||
|
if not hasattr(process_file, "album_art_cache"):
|
||||||
|
process_file.album_art_cache = {}
|
||||||
|
album_key = (artist, album)
|
||||||
|
image_bytes = process_file.album_art_cache.get(album_key)
|
||||||
|
source = "SRUtil"
|
||||||
|
if image_bytes is None:
|
||||||
|
image_bytes = await fetch_srutil_cover(sr, artist, album)
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[album_key] = image_bytes
|
||||||
|
if not image_bytes:
|
||||||
|
image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(album))
|
||||||
|
source = "MusicBrainz"
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[album_key] = image_bytes
|
||||||
|
if not image_bytes:
|
||||||
|
image_bytes = search_itunes_cover(artist, album)
|
||||||
|
source = "iTunes"
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[album_key] = image_bytes
|
||||||
|
# If all lookups failed, try with parenthetical tag stripped
|
||||||
|
if not image_bytes and re.search(r"\([^)]*\)$", album):
|
||||||
|
cleaned_album = strip_album_tags(album)
|
||||||
|
if cleaned_album and cleaned_album != album:
|
||||||
|
cleaned_key = (artist, cleaned_album)
|
||||||
|
image_bytes = process_file.album_art_cache.get(cleaned_key)
|
||||||
|
if image_bytes is None:
|
||||||
|
image_bytes = await fetch_srutil_cover(sr, artist, cleaned_album)
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[cleaned_key] = image_bytes
|
||||||
|
if not image_bytes:
|
||||||
|
image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(cleaned_album))
|
||||||
|
source = "MusicBrainz (stripped)"
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[cleaned_key] = image_bytes
|
||||||
|
if not image_bytes:
|
||||||
|
image_bytes = search_itunes_cover(artist, cleaned_album)
|
||||||
|
source = "iTunes (stripped)"
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[cleaned_key] = image_bytes
|
||||||
|
|
||||||
|
# If still not found, try with common suffixes (EP, LP, etc.) stripped from album name
|
||||||
|
if not image_bytes:
|
||||||
|
suffix_stripped_album = strip_album_suffix(album)
|
||||||
|
if suffix_stripped_album and suffix_stripped_album != album:
|
||||||
|
suffix_key = (artist, suffix_stripped_album)
|
||||||
|
image_bytes = process_file.album_art_cache.get(suffix_key)
|
||||||
|
if image_bytes is None:
|
||||||
|
image_bytes = await fetch_srutil_cover(sr, artist, suffix_stripped_album)
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[suffix_key] = image_bytes
|
||||||
|
if not image_bytes:
|
||||||
|
image_bytes = search_musicbrainz_cover(normalize_name(artist), normalize_name(suffix_stripped_album))
|
||||||
|
source = "MusicBrainz (suffix-stripped)"
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[suffix_key] = image_bytes
|
||||||
|
if not image_bytes:
|
||||||
|
image_bytes = search_itunes_cover(artist, suffix_stripped_album)
|
||||||
|
source = "iTunes (suffix-stripped)"
|
||||||
|
if image_bytes:
|
||||||
|
process_file.album_art_cache[suffix_key] = image_bytes
|
||||||
|
if isinstance(image_bytes, bytes):
|
||||||
|
ok = embed_cover(file, image_bytes)
|
||||||
|
status = "Embedded" if ok else "Failed to Embed"
|
||||||
|
if ok:
|
||||||
|
console.print(f"[green]Embedded cover art from {source}:[/green] {file}")
|
||||||
|
else:
|
||||||
|
console.print(f"[red]Failed to embed cover art ({source}):[/red] {file}")
|
||||||
|
elif image_bytes:
|
||||||
|
status = "Failed to Embed (not bytes)"
|
||||||
|
console.print(f"[red]Failed to embed cover art (not bytes) ({source}):[/red] {file}")
|
||||||
|
else:
|
||||||
|
status = "Not Found"
|
||||||
|
source = "-"
|
||||||
|
console.print(f"[red]No cover art found:[/red] {file}")
|
||||||
|
table.add_row(file, status, source)
|
||||||
|
results.append([file, status, source])
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
console.print(f"[bold blue]Scanning directory: {MUSIC_DIR}[/bold blue]")
|
||||||
|
sr = SRUtil()
|
||||||
|
results = []
|
||||||
|
files = []
|
||||||
|
for root, _, filenames in os.walk(MUSIC_DIR):
|
||||||
|
for fn in filenames:
|
||||||
|
if os.path.splitext(fn)[1].lower() in AUDIO_EXTS:
|
||||||
|
file_path = os.path.join(root, fn)
|
||||||
|
files.append(file_path)
|
||||||
|
|
||||||
|
table = Table(title="Cover Art Embedding Report")
|
||||||
|
table.add_column("File", style="cyan", overflow="fold")
|
||||||
|
table.add_column("Status", style="green")
|
||||||
|
table.add_column("Source", style="magenta")
|
||||||
|
|
||||||
|
|
||||||
|
sem = asyncio.Semaphore(CONCURRENCY)
|
||||||
|
async def worker(file, sr, table, results, sem, progress, task_id):
|
||||||
|
await process_file(file, sr, table, results, sem)
|
||||||
|
progress.update(task_id, advance=1)
|
||||||
|
|
||||||
|
with Progress(
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
BarColumn(),
|
||||||
|
TaskProgressColumn(),
|
||||||
|
TimeElapsedColumn(),
|
||||||
|
) as progress:
|
||||||
|
task_id = progress.add_task("Processing files...", total=len(files))
|
||||||
|
# Schedule all workers
|
||||||
|
await asyncio.gather(*(worker(file, sr, table, results, sem, progress, task_id) for file in files))
|
||||||
|
|
||||||
|
# Print summary table and CSV after progress bar
|
||||||
|
console.print(table)
|
||||||
|
with open(REPORT_CSV, "w", newline="") as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(["File", "Status", "Source"])
|
||||||
|
writer.writerows(results)
|
||||||
|
console.print(f"[bold green]CSV report written to {REPORT_CSV}[/bold green]")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
23
test/test_search_track.py
Normal file
23
test/test_search_track.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, "..")
|
||||||
|
from utils.sr_wrapper import SRUtil
|
||||||
|
|
||||||
|
# logging.getLogger("sr_wrapper").propagate = False
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
sr = SRUtil()
|
||||||
|
artist, album = "Kadavar - The Sacrament Of Sin".split(" - ")
|
||||||
|
search_res = await sr.get_album_by_name(artist[:8], album)
|
||||||
|
logging.critical("Search result: %s", search_res)
|
||||||
|
album = search_res
|
||||||
|
_cover = await sr.get_cover_by_album_id(album.get('id'), 640)
|
||||||
|
# cover = sr._get_tidal_cover_url(album.get('cover'), 640)
|
||||||
|
logging.critical("Result: %s, Cover: %s", album, _cover)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
asyncio.run(main())
|
@@ -7,20 +7,23 @@ import traceback
|
|||||||
import uuid
|
import uuid
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
import shutil
|
||||||
import re
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from mediafile import MediaFile # type: ignore[import]
|
from mediafile import MediaFile # type: ignore[import]
|
||||||
from rq import get_current_job
|
from rq import get_current_job
|
||||||
from utils.sr_wrapper import SRUtil
|
from utils.sr_wrapper import SRUtil
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import re
|
||||||
|
|
||||||
# ---------- Config ----------
|
# ---------- Config ----------
|
||||||
ROOT_DIR = Path("/storage/music2")
|
ROOT_DIR = Path("/storage/music2")
|
||||||
MAX_RETRIES = 5
|
MAX_RETRIES = 5
|
||||||
THROTTLE_MIN = 1.0
|
THROTTLE_MIN = 1.0
|
||||||
THROTTLE_MAX = 3.5
|
THROTTLE_MAX = 3.5
|
||||||
|
DISCORD_WEBHOOK = os.getenv("TRIP_WEBHOOK_URI", "").strip()
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
"User-Agent": (
|
"User-Agent": (
|
||||||
@@ -38,26 +41,71 @@ logging.basicConfig(
|
|||||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
sr = SRUtil()
|
sr = SRUtil()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Discord helper ----------
|
||||||
|
async def discord_notify(webhook_url: str, title: str, description: str, target: Optional[str] = None, color: int = 0x00FF00):
|
||||||
|
embed = {
|
||||||
|
"title": title,
|
||||||
|
"description": description[:1900] if description else "",
|
||||||
|
"color": color,
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
}
|
||||||
|
if target:
|
||||||
|
embed["fields"] = [{"name": "Target", "value": str(target), "inline": True}]
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"embeds": [embed],
|
||||||
|
}
|
||||||
|
|
||||||
|
while True: # permanent retry
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(webhook_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as resp:
|
||||||
|
if resp.status >= 400:
|
||||||
|
text = await resp.text()
|
||||||
|
raise RuntimeError(f"Discord webhook failed ({resp.status}): {text}")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Discord send failed, retrying: {e}")
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
def send_log_to_discord(message: str, level: str, target: Optional[str] = None):
|
||||||
|
colors = {"WARNING": 0xFFA500, "ERROR": 0xFF0000, "CRITICAL": 0xFF0000}
|
||||||
|
color = colors.get(level.upper(), 0xFFFF00)
|
||||||
|
|
||||||
|
async def _send():
|
||||||
|
await discord_notify(
|
||||||
|
webhook_url=DISCORD_WEBHOOK,
|
||||||
|
title=f"{level} in bulk_download",
|
||||||
|
description=message,
|
||||||
|
target=target,
|
||||||
|
color=color
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.get_running_loop()
|
||||||
|
# already in an event loop — schedule a task
|
||||||
|
asyncio.create_task(_send())
|
||||||
|
except RuntimeError:
|
||||||
|
# not in an event loop — safe to run
|
||||||
|
asyncio.run(_send())
|
||||||
|
|
||||||
|
|
||||||
# ---------- Helpers ----------
|
# ---------- Helpers ----------
|
||||||
|
|
||||||
|
|
||||||
def tag_with_mediafile(file_path: str, meta: dict):
|
def tag_with_mediafile(file_path: str, meta: dict):
|
||||||
f = MediaFile(file_path)
|
f = MediaFile(file_path)
|
||||||
|
|
||||||
# --- Helper to safely set textual/number fields ---
|
|
||||||
def safe_set(attr, value, default=None, cast=None):
|
def safe_set(attr, value, default=None, cast=None):
|
||||||
if value is None:
|
if value is None:
|
||||||
value = default
|
value = default
|
||||||
if value is not None:
|
if value is not None:
|
||||||
if cast is not None:
|
if cast:
|
||||||
setattr(f, attr, cast(value))
|
setattr(f, attr, cast(value))
|
||||||
else:
|
else:
|
||||||
setattr(f, attr, str(value))
|
setattr(f, attr, str(value))
|
||||||
|
|
||||||
# --- Basic textual metadata ---
|
|
||||||
safe_set("title", meta.get("title"), default="Unknown Title")
|
safe_set("title", meta.get("title"), default="Unknown Title")
|
||||||
safe_set("artist", meta.get("artist"), default="Unknown Artist")
|
safe_set("artist", meta.get("artist"), default="Unknown Artist")
|
||||||
safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist")
|
safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist")
|
||||||
@@ -66,8 +114,6 @@ def tag_with_mediafile(file_path: str, meta: dict):
|
|||||||
safe_set("disc", meta.get("disc_number"), default=0, cast=int)
|
safe_set("disc", meta.get("disc_number"), default=0, cast=int)
|
||||||
safe_set("isrc", meta.get("isrc"), default="")
|
safe_set("isrc", meta.get("isrc"), default="")
|
||||||
safe_set("bpm", meta.get("bpm"), default=0, cast=int)
|
safe_set("bpm", meta.get("bpm"), default=0, cast=int)
|
||||||
|
|
||||||
# --- Release date ---
|
|
||||||
release_date_str = meta.get("release_date")
|
release_date_str = meta.get("release_date")
|
||||||
release_date_obj = None
|
release_date_obj = None
|
||||||
if release_date_str:
|
if release_date_str:
|
||||||
@@ -75,34 +121,15 @@ def tag_with_mediafile(file_path: str, meta: dict):
|
|||||||
release_date_obj = datetime.fromisoformat(release_date_str).date()
|
release_date_obj = datetime.fromisoformat(release_date_str).date()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
try:
|
try:
|
||||||
# fallback if only year string
|
|
||||||
release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date()
|
release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if release_date_obj:
|
if release_date_obj:
|
||||||
f.date = release_date_obj
|
f.date = release_date_obj
|
||||||
|
|
||||||
# --- Save all tags ---
|
|
||||||
f.save()
|
f.save()
|
||||||
|
|
||||||
|
|
||||||
def cleanup_empty_dirs(root: Path):
|
|
||||||
"""
|
|
||||||
Recursively remove any directories under root that contain no files
|
|
||||||
(empty or only empty subdirectories).
|
|
||||||
"""
|
|
||||||
for dirpath, dirnames, filenames in os.walk(root, topdown=False):
|
|
||||||
p = Path(dirpath)
|
|
||||||
has_file = any(f.is_file() for f in p.rglob("*"))
|
|
||||||
if not has_file:
|
|
||||||
try:
|
|
||||||
p.rmdir()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_filename(name: str) -> str:
|
def sanitize_filename(name: str) -> str:
|
||||||
"""Make a string safe for file/dir names."""
|
|
||||||
if not name:
|
if not name:
|
||||||
return "Unknown"
|
return "Unknown"
|
||||||
name = name.replace("/", "-").replace("\\", "-")
|
name = name.replace("/", "-").replace("\\", "-")
|
||||||
@@ -113,18 +140,12 @@ def sanitize_filename(name: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def ensure_unique_path(p: Path) -> Path:
|
def ensure_unique_path(p: Path) -> Path:
|
||||||
"""
|
|
||||||
Ensure the given file or directory path is unique *within its parent folder*.
|
|
||||||
Only appends (2), (3)... if a real conflict exists in that folder.
|
|
||||||
"""
|
|
||||||
parent = p.parent
|
parent = p.parent
|
||||||
stem, suffix = p.stem, p.suffix
|
stem, suffix = p.stem, p.suffix
|
||||||
existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()}
|
existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()}
|
||||||
|
|
||||||
candidate = f"{stem}{suffix}"
|
candidate = f"{stem}{suffix}"
|
||||||
if candidate not in existing:
|
if candidate not in existing:
|
||||||
return parent / candidate
|
return parent / candidate
|
||||||
|
|
||||||
counter = 2
|
counter = 2
|
||||||
while True:
|
while True:
|
||||||
candidate = f"{stem} ({counter}){suffix}"
|
candidate = f"{stem} ({counter}){suffix}"
|
||||||
@@ -133,7 +154,7 @@ def ensure_unique_path(p: Path) -> Path:
|
|||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
|
|
||||||
# ---------- Job ----------
|
# ---------- bulk_download ----------
|
||||||
def bulk_download(track_list: list, quality: str = "FLAC"):
|
def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||||
"""
|
"""
|
||||||
RQ job:
|
RQ job:
|
||||||
@@ -142,9 +163,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
- uses SR metadata to name/organize files
|
- uses SR metadata to name/organize files
|
||||||
- creates ONE tarball for all tracks
|
- creates ONE tarball for all tracks
|
||||||
- returns [tarball_path]
|
- returns [tarball_path]
|
||||||
|
- sends relevant messages to Discord
|
||||||
"""
|
"""
|
||||||
job = get_current_job()
|
job = get_current_job()
|
||||||
job_id = job.id if job else uuid.uuid4().hex
|
job_id = job.id if job else uuid.uuid4().hex
|
||||||
|
target = job.meta.get("target") if job else None
|
||||||
staging_root = ROOT_DIR / job_id
|
staging_root = ROOT_DIR / job_id
|
||||||
|
|
||||||
if job:
|
if job:
|
||||||
@@ -156,30 +179,27 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
job.meta["status"] = "Started"
|
job.meta["status"] = "Started"
|
||||||
job.save_meta()
|
job.save_meta()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning("Failed to init job.meta: %s", e)
|
send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target)
|
||||||
|
|
||||||
|
# Job started Discord message
|
||||||
|
asyncio.run(discord_notify(
|
||||||
|
DISCORD_WEBHOOK,
|
||||||
|
title=f"Job Started: {job_id}",
|
||||||
|
description=f"Processing `{len(track_list)}` track(s)",
|
||||||
|
target=target,
|
||||||
|
color=0x00FFFF
|
||||||
|
))
|
||||||
|
|
||||||
async def process_tracks():
|
async def process_tracks():
|
||||||
per_track_meta = []
|
per_track_meta = []
|
||||||
all_final_files = []
|
all_final_files = []
|
||||||
all_artists = set()
|
all_artists = set()
|
||||||
|
|
||||||
(ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
|
(ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
async with aiohttp.ClientSession(headers=HEADERS) as session:
|
async with aiohttp.ClientSession(headers=HEADERS) as session:
|
||||||
total = len(track_list or [])
|
total = len(track_list or [])
|
||||||
logging.critical("Total tracks to process: %s", total)
|
|
||||||
if job:
|
|
||||||
job.meta["progress"] = 0
|
|
||||||
job.save_meta()
|
|
||||||
|
|
||||||
for i, track_id in enumerate(track_list or []):
|
for i, track_id in enumerate(track_list or []):
|
||||||
track_info = {
|
track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0}
|
||||||
"track_id": str(track_id),
|
|
||||||
"status": "Pending",
|
|
||||||
"file_path": None,
|
|
||||||
"error": None,
|
|
||||||
"attempts": 0,
|
|
||||||
}
|
|
||||||
attempt = 0
|
attempt = 0
|
||||||
|
|
||||||
while attempt < MAX_RETRIES:
|
while attempt < MAX_RETRIES:
|
||||||
@@ -195,7 +215,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
clean_path = unquote(parsed.path)
|
clean_path = unquote(parsed.path)
|
||||||
ext = Path(clean_path).suffix or ".mp3"
|
ext = Path(clean_path).suffix or ".mp3"
|
||||||
|
|
||||||
tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}")
|
tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}")
|
||||||
|
|
||||||
async with session.get(url) as resp:
|
async with session.get(url) as resp:
|
||||||
@@ -205,7 +224,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
|
|
||||||
md = await sr.get_metadata_by_track_id(track_id) or {}
|
md = await sr.get_metadata_by_track_id(track_id) or {}
|
||||||
logging.info("Metadata for %s: %s", track_id, md)
|
|
||||||
artist_raw = md.get("artist") or "Unknown Artist"
|
artist_raw = md.get("artist") or "Unknown Artist"
|
||||||
album_raw = md.get("album") or "Unknown Album"
|
album_raw = md.get("album") or "Unknown Album"
|
||||||
title_raw = md.get("title") or f"Track {track_id}"
|
title_raw = md.get("title") or f"Track {track_id}"
|
||||||
@@ -215,14 +233,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
title = sanitize_filename(title_raw)
|
title = sanitize_filename(title_raw)
|
||||||
|
|
||||||
all_artists.add(artist)
|
all_artists.add(artist)
|
||||||
|
album_dir = staging_root / artist / album
|
||||||
artist_dir = staging_root / artist
|
|
||||||
album_dir = artist_dir / album
|
|
||||||
album_dir.mkdir(parents=True, exist_ok=True)
|
album_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
final_file = ensure_unique_path(album_dir / f"{title}{ext}")
|
final_file = ensure_unique_path(album_dir / f"{title}{ext}")
|
||||||
tag_with_mediafile(str(tmp_file), md)
|
|
||||||
|
|
||||||
|
tag_with_mediafile(str(tmp_file), md)
|
||||||
tmp_file.rename(final_file)
|
tmp_file.rename(final_file)
|
||||||
tmp_file = None
|
tmp_file = None
|
||||||
|
|
||||||
@@ -233,60 +248,48 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
|
|
||||||
if job:
|
if job:
|
||||||
job.meta["progress"] = int(((i + 1) / total) * 100)
|
job.meta["progress"] = int(((i + 1) / total) * 100)
|
||||||
|
job.meta["tracks"] = per_track_meta + [track_info]
|
||||||
job.save_meta()
|
job.save_meta()
|
||||||
break
|
break
|
||||||
|
|
||||||
except aiohttp.ClientResponseError as e:
|
except aiohttp.ClientResponseError as e:
|
||||||
|
msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}"
|
||||||
|
send_log_to_discord(msg, "WARNING", target)
|
||||||
if e.status == 429:
|
if e.status == 429:
|
||||||
wait_time = min(60, 2**attempt) # exponential up to 60s
|
wait_time = min(60, 2**attempt)
|
||||||
logging.warning(
|
|
||||||
"Rate limited (429). Sleeping %s seconds", wait_time
|
|
||||||
)
|
|
||||||
await asyncio.sleep(wait_time)
|
await asyncio.sleep(wait_time)
|
||||||
else:
|
else:
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
||||||
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(
|
tb = traceback.format_exc()
|
||||||
"Track %s attempt %s failed: %s", track_id, attempt, e
|
msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
|
||||||
)
|
send_log_to_discord(msg, "ERROR", target)
|
||||||
traceback.print_exc()
|
|
||||||
track_info["error"] = str(e)
|
track_info["error"] = str(e)
|
||||||
if attempt >= MAX_RETRIES:
|
if attempt >= MAX_RETRIES:
|
||||||
track_info["status"] = "Failed"
|
track_info["status"] = "Failed"
|
||||||
|
send_log_to_discord(f"Track {track_id} failed after {attempt} attempts", "ERROR", target)
|
||||||
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
if tmp_file and tmp_file.exists():
|
if tmp_file and tmp_file.exists():
|
||||||
tmp_file.unlink()
|
os.remove(tmp_file)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
per_track_meta.append(track_info)
|
per_track_meta.append(track_info)
|
||||||
if job:
|
|
||||||
try:
|
|
||||||
job.meta["tracks"] = per_track_meta
|
|
||||||
job.save_meta()
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(
|
|
||||||
"Failed to update job.meta after track %s: %s", track_id, e
|
|
||||||
)
|
|
||||||
|
|
||||||
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
|
||||||
|
|
||||||
if not all_final_files:
|
if not all_final_files:
|
||||||
if job:
|
if job:
|
||||||
try:
|
|
||||||
job.meta["tarball"] = None
|
job.meta["tarball"] = None
|
||||||
job.meta["status"] = "Failed"
|
job.meta["status"] = "Failed"
|
||||||
job.save_meta()
|
job.save_meta()
|
||||||
except Exception:
|
send_log_to_discord(f"No tracks were successfully downloaded for job `{job_id}`", "CRITICAL", target)
|
||||||
pass
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
artist_counts: dict[str, int] = {}
|
# Tarball creation
|
||||||
|
artist_counts = {}
|
||||||
for t in per_track_meta:
|
for t in per_track_meta:
|
||||||
if t["status"] == "Success" and t.get("file_path"):
|
if t["status"] == "Success" and t.get("file_path"):
|
||||||
try:
|
try:
|
||||||
@@ -294,17 +297,10 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
except Exception:
|
except Exception:
|
||||||
artist = "Unknown Artist"
|
artist = "Unknown Artist"
|
||||||
artist_counts[artist] = artist_counts.get(artist, 0) + 1
|
artist_counts[artist] = artist_counts.get(artist, 0) + 1
|
||||||
|
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist"
|
||||||
if artist_counts:
|
|
||||||
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[
|
|
||||||
0
|
|
||||||
][0]
|
|
||||||
else:
|
|
||||||
top_artist = "Unknown Artist"
|
|
||||||
|
|
||||||
combined_artist = sanitize_filename(top_artist)
|
combined_artist = sanitize_filename(top_artist)
|
||||||
staged_tarball = staging_root / f"{combined_artist}.tar.gz"
|
staged_tarball = staging_root / f"{combined_artist}.tar.gz"
|
||||||
# Ensure uniqueness (Windows-style padding) within the parent folder
|
|
||||||
counter = 1
|
counter = 1
|
||||||
base_name = staged_tarball.stem
|
base_name = staged_tarball.stem
|
||||||
while staged_tarball.exists():
|
while staged_tarball.exists():
|
||||||
@@ -315,26 +311,18 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
final_tarball.parent.mkdir(parents=True, exist_ok=True)
|
final_tarball.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if job:
|
if job:
|
||||||
try:
|
|
||||||
job.meta["status"] = "Compressing"
|
job.meta["status"] = "Compressing"
|
||||||
job.save_meta()
|
job.save_meta()
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
logging.info("Creating tarball: %s", staged_tarball)
|
logging.info("Creating tarball: %s", staged_tarball)
|
||||||
|
await discord_notify(DISCORD_WEBHOOK,
|
||||||
def _create_tar_sync():
|
title=f"Compressing: Job {job_id}",
|
||||||
|
description=f"Creating tarball (`{len(track_list)}` track(s)).\nStaging path: {staged_tarball}",
|
||||||
|
color=0xFFA500,
|
||||||
|
target=target)
|
||||||
try:
|
try:
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[
|
["tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root)]
|
||||||
"tar",
|
|
||||||
"-I",
|
|
||||||
"pigz -9",
|
|
||||||
"-cf",
|
|
||||||
str(staged_tarball),
|
|
||||||
"-C",
|
|
||||||
str(staging_root),
|
|
||||||
]
|
|
||||||
+ [str(f.relative_to(staging_root)) for f in all_final_files],
|
+ [str(f.relative_to(staging_root)) for f in all_final_files],
|
||||||
check=True,
|
check=True,
|
||||||
)
|
)
|
||||||
@@ -344,7 +332,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logging.warning("pigz not available, falling back to tarfile (slower).")
|
send_log_to_discord("pigz not available, falling back to tarfile (slower).", "WARNING", target)
|
||||||
with tarfile.open(staged_tarball, "w:gz") as tar:
|
with tarfile.open(staged_tarball, "w:gz") as tar:
|
||||||
for f in all_final_files:
|
for f in all_final_files:
|
||||||
try:
|
try:
|
||||||
@@ -357,27 +345,18 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
await asyncio.to_thread(_create_tar_sync)
|
|
||||||
|
|
||||||
if not staged_tarball.exists():
|
if not staged_tarball.exists():
|
||||||
logging.error("Tarball was not created: %s", staged_tarball)
|
send_log_to_discord(f"Tarball was not created: `{staged_tarball}`", "CRITICAL", target)
|
||||||
if job:
|
if job:
|
||||||
try:
|
|
||||||
job.meta["status"] = "compress_failed"
|
job.meta["status"] = "compress_failed"
|
||||||
job.save_meta()
|
job.save_meta()
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
logging.critical("Tarball created: %s", staged_tarball)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
staged_tarball.rename(final_tarball)
|
staged_tarball.rename(final_tarball)
|
||||||
except Exception:
|
except Exception:
|
||||||
shutil.move(str(staged_tarball), str(final_tarball))
|
shutil.move(str(staged_tarball), str(final_tarball))
|
||||||
|
|
||||||
logging.critical("Tarball finalized: %s", final_tarball)
|
|
||||||
|
|
||||||
await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
|
await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
|
||||||
|
|
||||||
if job:
|
if job:
|
||||||
@@ -386,6 +365,15 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
job.meta["status"] = "Completed"
|
job.meta["status"] = "Completed"
|
||||||
job.save_meta()
|
job.save_meta()
|
||||||
|
|
||||||
|
# Job completed Discord message
|
||||||
|
await discord_notify(
|
||||||
|
DISCORD_WEBHOOK,
|
||||||
|
title=f"Job Completed: {job_id}",
|
||||||
|
description=f"Processed `{len(track_list)}` track(s). Tarball: `{final_tarball}`",
|
||||||
|
target=target,
|
||||||
|
color=0x00FF00
|
||||||
|
)
|
||||||
|
|
||||||
return [str(final_tarball)]
|
return [str(final_tarball)]
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
@@ -393,9 +381,9 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
|||||||
try:
|
try:
|
||||||
return loop.run_until_complete(process_tracks())
|
return loop.run_until_complete(process_tracks())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
send_log_to_discord(f"bulk_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target)
|
||||||
if job:
|
if job:
|
||||||
job.meta["status"] = "Failed"
|
job.meta["status"] = "Failed"
|
||||||
job.save_meta()
|
job.save_meta()
|
||||||
logging.critical("Exception: %s", str(e))
|
|
||||||
finally:
|
finally:
|
||||||
loop.close()
|
loop.close()
|
||||||
|
@@ -2,7 +2,17 @@ from typing import Optional, Any
|
|||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import traceback
|
||||||
import logging
|
import logging
|
||||||
|
# Suppress all logging output from this module and its children
|
||||||
|
for name in [__name__, "utils.sr_wrapper"]:
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
logger.setLevel(logging.CRITICAL)
|
||||||
|
logger.propagate = False
|
||||||
|
for handler in logger.handlers:
|
||||||
|
handler.setLevel(logging.CRITICAL)
|
||||||
|
# Also set the root logger to CRITICAL as a last resort (may affect global logging)
|
||||||
|
logging.getLogger().setLevel(logging.CRITICAL)
|
||||||
import random
|
import random
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
@@ -11,6 +21,8 @@ import time
|
|||||||
from streamrip.client import TidalClient # type: ignore
|
from streamrip.client import TidalClient # type: ignore
|
||||||
from streamrip.config import Config as StreamripConfig # type: ignore
|
from streamrip.config import Config as StreamripConfig # type: ignore
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -64,6 +76,17 @@ class SRUtil:
|
|||||||
self.last_request_time = time.time()
|
self.last_request_time = time.time()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def is_fuzzy_match(self, expected, actual, threshold=80):
|
||||||
|
if not expected or not actual:
|
||||||
|
return False
|
||||||
|
return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold
|
||||||
|
|
||||||
|
def is_metadata_match(self, expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80):
|
||||||
|
artist_match = self.is_fuzzy_match(expected_artist, found_artist, threshold)
|
||||||
|
album_match = self.is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True
|
||||||
|
title_match = self.is_fuzzy_match(expected_title, found_title, threshold)
|
||||||
|
return artist_match and album_match and title_match
|
||||||
|
|
||||||
def dedupe_by_key(self, key: str, entries: list[dict]) -> list[dict]:
|
def dedupe_by_key(self, key: str, entries: list[dict]) -> list[dict]:
|
||||||
deduped = {}
|
deduped = {}
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
@@ -78,6 +101,23 @@ class SRUtil:
|
|||||||
m, s = divmod(seconds, 60)
|
m, s = divmod(seconds, 60)
|
||||||
return f"{m}:{s:02}"
|
return f"{m}:{s:02}"
|
||||||
|
|
||||||
|
def _get_tidal_cover_url(self, uuid, size):
|
||||||
|
"""Generate a tidal cover url.
|
||||||
|
|
||||||
|
:param uuid: VALID uuid string
|
||||||
|
:param size:
|
||||||
|
"""
|
||||||
|
TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg"
|
||||||
|
possibles = (80, 160, 320, 640, 1280)
|
||||||
|
assert size in possibles, f"size must be in {possibles}"
|
||||||
|
return TIDAL_COVER_URL.format(
|
||||||
|
uuid=uuid.replace("-", "/"),
|
||||||
|
height=size,
|
||||||
|
width=size,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def combine_album_track_metadata(
|
def combine_album_track_metadata(
|
||||||
self, album_json: dict | None, track_json: dict
|
self, album_json: dict | None, track_json: dict
|
||||||
) -> dict:
|
) -> dict:
|
||||||
@@ -140,32 +180,33 @@ class SRUtil:
|
|||||||
]
|
]
|
||||||
|
|
||||||
async def get_artists_by_name(self, artist_name: str) -> Optional[list]:
|
async def get_artists_by_name(self, artist_name: str) -> Optional[list]:
|
||||||
"""Get artist(s) by name.
|
"""Get artist(s) by name. Retry login only on authentication failure. Rate limit and retry on 400/429."""
|
||||||
Args:
|
import asyncio
|
||||||
artist_name (str): The name of the artist.
|
|
||||||
Returns:
|
|
||||||
Optional[dict]: The artist details or None if not found.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
await self.streamrip_client.login()
|
|
||||||
except Exception as e:
|
|
||||||
logging.info("Login Exception: %s", str(e))
|
|
||||||
pass
|
|
||||||
artists_out: list[dict] = []
|
artists_out: list[dict] = []
|
||||||
|
max_retries = 4
|
||||||
|
delay = 1.0
|
||||||
|
for attempt in range(max_retries):
|
||||||
try:
|
try:
|
||||||
artists = await self.streamrip_client.search(
|
artists = await self.streamrip_client.search(
|
||||||
media_type="artist", query=artist_name
|
media_type="artist", query=artist_name
|
||||||
)
|
)
|
||||||
|
break
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
await self.streamrip_client.login()
|
await self.streamrip_client.login()
|
||||||
artists = await self.streamrip_client.search(
|
if attempt == max_retries - 1:
|
||||||
media_type="artist", query=artist_name
|
return None
|
||||||
)
|
except Exception as e:
|
||||||
logging.critical("Artists output: %s", artists)
|
msg = str(e)
|
||||||
|
if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
delay *= 2
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
artists = artists[0].get("items", [])
|
artists = artists[0].get("items", [])
|
||||||
if not artists:
|
if not artists:
|
||||||
logging.warning("No artist found for name: %s", artist_name)
|
|
||||||
return None
|
return None
|
||||||
artists_out = [
|
artists_out = [
|
||||||
{
|
{
|
||||||
@@ -179,26 +220,33 @@ class SRUtil:
|
|||||||
return artists_out
|
return artists_out
|
||||||
|
|
||||||
async def get_albums_by_artist_id(self, artist_id: int) -> Optional[list | dict]:
|
async def get_albums_by_artist_id(self, artist_id: int) -> Optional[list | dict]:
|
||||||
"""Get albums by artist ID
|
"""Get albums by artist ID. Retry login only on authentication failure. Rate limit and retry on 400/429."""
|
||||||
Args:
|
import asyncio
|
||||||
artist_id (int): The ID of the artist.
|
|
||||||
Returns:
|
|
||||||
Optional[list[dict]]: List of albums or None if not found.
|
|
||||||
"""
|
|
||||||
artist_id_str: str = str(artist_id)
|
artist_id_str: str = str(artist_id)
|
||||||
albums_out: list[dict] = []
|
albums_out: list[dict] = []
|
||||||
|
max_retries = 4
|
||||||
|
delay = 1.0
|
||||||
|
for attempt in range(max_retries):
|
||||||
try:
|
try:
|
||||||
await self.streamrip_client.login()
|
|
||||||
metadata = await self.streamrip_client.get_metadata(
|
metadata = await self.streamrip_client.get_metadata(
|
||||||
item_id=artist_id_str, media_type="artist"
|
item_id=artist_id_str, media_type="artist"
|
||||||
)
|
)
|
||||||
|
break
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
await self.streamrip_client.login()
|
await self.streamrip_client.login()
|
||||||
metadata = await self.streamrip_client.get_metadata(
|
if attempt == max_retries - 1:
|
||||||
item_id=artist_id_str, media_type="artist"
|
return None
|
||||||
)
|
except Exception as e:
|
||||||
|
msg = str(e)
|
||||||
|
if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
delay *= 2
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
if not metadata:
|
if not metadata:
|
||||||
logging.warning("No metadata found for artist ID: %s", artist_id)
|
|
||||||
return None
|
return None
|
||||||
albums = self.dedupe_by_key("title", metadata.get("albums", []))
|
albums = self.dedupe_by_key("title", metadata.get("albums", []))
|
||||||
albums_out = [
|
albums_out = [
|
||||||
@@ -211,10 +259,66 @@ class SRUtil:
|
|||||||
for album in albums
|
for album in albums
|
||||||
if "title" in album and "id" in album and "artists" in album
|
if "title" in album and "id" in album and "artists" in album
|
||||||
]
|
]
|
||||||
|
|
||||||
logging.debug("Retrieved albums: %s", albums_out)
|
|
||||||
return albums_out
|
return albums_out
|
||||||
|
|
||||||
|
async def get_album_by_name(self, artist: str, album: str) -> Optional[dict]:
|
||||||
|
"""Get album by artist and album name using artist ID and fuzzy matching. Try first 8 chars, then 12 if no match. Notify on success."""
|
||||||
|
# Notification moved to add_cover_art.py as requested
|
||||||
|
for trunc in (8, 12):
|
||||||
|
search_artist = artist[:trunc]
|
||||||
|
artists = await self.get_artists_by_name(search_artist)
|
||||||
|
if not artists:
|
||||||
|
continue
|
||||||
|
best_artist = None
|
||||||
|
best_artist_score = 0
|
||||||
|
for a in artists:
|
||||||
|
score = fuzz.token_set_ratio(artist, a["artist"])
|
||||||
|
if score > best_artist_score:
|
||||||
|
best_artist = a
|
||||||
|
best_artist_score = int(score)
|
||||||
|
if not best_artist or best_artist_score < 85:
|
||||||
|
continue
|
||||||
|
artist_id = best_artist["id"]
|
||||||
|
albums = await self.get_albums_by_artist_id(artist_id)
|
||||||
|
if not albums:
|
||||||
|
continue
|
||||||
|
best_album = None
|
||||||
|
best_album_score = 0
|
||||||
|
for alb in albums:
|
||||||
|
score = fuzz.token_set_ratio(album, alb["album"])
|
||||||
|
if score > best_album_score:
|
||||||
|
best_album = alb
|
||||||
|
best_album_score = int(score)
|
||||||
|
if best_album and best_album_score >= 85:
|
||||||
|
return best_album
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_cover_by_album_id(self, album_id: int, size: int = 640) -> Optional[str]:
|
||||||
|
"""Get cover URL by album ID. Retry login only on authentication failure."""
|
||||||
|
if size not in [80, 160, 320, 640, 1280]:
|
||||||
|
return None
|
||||||
|
album_id_str: str = str(album_id)
|
||||||
|
for attempt in range(2):
|
||||||
|
try:
|
||||||
|
metadata = await self.streamrip_client.get_metadata(
|
||||||
|
item_id=album_id_str, media_type="album"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
except AttributeError:
|
||||||
|
await self.streamrip_client.login()
|
||||||
|
if attempt == 1:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
if not metadata:
|
||||||
|
return None
|
||||||
|
cover_id = metadata.get("cover")
|
||||||
|
if not cover_id:
|
||||||
|
return None
|
||||||
|
cover_url = self._get_tidal_cover_url(cover_id, size)
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
async def get_tracks_by_album_id(
|
async def get_tracks_by_album_id(
|
||||||
self, album_id: int, quality: str = "FLAC"
|
self, album_id: int, quality: str = "FLAC"
|
||||||
) -> Optional[list | dict]:
|
) -> Optional[list | dict]:
|
||||||
@@ -247,7 +351,7 @@ class SRUtil:
|
|||||||
]
|
]
|
||||||
return tracks_out
|
return tracks_out
|
||||||
|
|
||||||
async def get_tracks_by_artist_song(self, artist: str, song: str) -> Optional[list]:
|
async def get_tracks_by_artist_song(self, artist: str, song: str, n: int = 0) -> Optional[list]:
|
||||||
"""Get track by artist and song name
|
"""Get track by artist and song name
|
||||||
Args:
|
Args:
|
||||||
artist (str): The name of the artist.
|
artist (str): The name of the artist.
|
||||||
@@ -256,7 +360,23 @@ class SRUtil:
|
|||||||
Optional[dict]: The track details or None if not found.
|
Optional[dict]: The track details or None if not found.
|
||||||
TODO: Reimplement using StreamRip
|
TODO: Reimplement using StreamRip
|
||||||
"""
|
"""
|
||||||
|
if not self.streamrip_client.logged_in:
|
||||||
|
await self.streamrip_client.login()
|
||||||
|
try:
|
||||||
|
search_res = await self.streamrip_client.search(media_type="track",
|
||||||
|
query=f"{artist} - {song}",
|
||||||
|
)
|
||||||
|
logging.critical("Result: %s", search_res)
|
||||||
|
return search_res[0].get('items')
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
logging.critical("Search Exception: %s", str(e))
|
||||||
|
if n < 3:
|
||||||
|
n+=1
|
||||||
|
return await self.get_tracks_by_artist_song(artist, song, n)
|
||||||
|
finally:
|
||||||
return []
|
return []
|
||||||
|
# return []
|
||||||
|
|
||||||
async def get_stream_url_by_track_id(
|
async def get_stream_url_by_track_id(
|
||||||
self, track_id: int, quality: str = "FLAC"
|
self, track_id: int, quality: str = "FLAC"
|
||||||
|
Reference in New Issue
Block a user