This commit is contained in:
2025-09-18 08:13:21 -04:00
parent 3b74333b96
commit e1194475b3
5 changed files with 661 additions and 188 deletions

View File

@@ -7,20 +7,23 @@ import traceback
import uuid
import subprocess
import shutil
import re
from pathlib import Path
from typing import Optional
from urllib.parse import urlparse, unquote
import aiohttp
from datetime import datetime
from datetime import datetime, timezone
from mediafile import MediaFile # type: ignore[import]
from rq import get_current_job
from utils.sr_wrapper import SRUtil
from dotenv import load_dotenv
import re
# ---------- Config ----------
ROOT_DIR = Path("/storage/music2")
MAX_RETRIES = 5
THROTTLE_MIN = 1.0
THROTTLE_MAX = 3.5
DISCORD_WEBHOOK = os.getenv("TRIP_WEBHOOK_URI", "").strip()
HEADERS = {
"User-Agent": (
@@ -38,26 +41,71 @@ logging.basicConfig(
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
load_dotenv()
sr = SRUtil()
# ---------- Discord helper ----------
async def discord_notify(webhook_url: str, title: str, description: str, target: Optional[str] = None, color: int = 0x00FF00):
embed = {
"title": title,
"description": description[:1900] if description else "",
"color": color,
"timestamp": datetime.now(timezone.utc).isoformat(),
}
if target:
embed["fields"] = [{"name": "Target", "value": str(target), "inline": True}]
payload = {
"embeds": [embed],
}
while True: # permanent retry
try:
async with aiohttp.ClientSession() as session:
async with session.post(webhook_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as resp:
if resp.status >= 400:
text = await resp.text()
raise RuntimeError(f"Discord webhook failed ({resp.status}): {text}")
break
except Exception as e:
print(f"Discord send failed, retrying: {e}")
await asyncio.sleep(5)
def send_log_to_discord(message: str, level: str, target: Optional[str] = None):
colors = {"WARNING": 0xFFA500, "ERROR": 0xFF0000, "CRITICAL": 0xFF0000}
color = colors.get(level.upper(), 0xFFFF00)
async def _send():
await discord_notify(
webhook_url=DISCORD_WEBHOOK,
title=f"{level} in bulk_download",
description=message,
target=target,
color=color
)
try:
asyncio.get_running_loop()
# already in an event loop — schedule a task
asyncio.create_task(_send())
except RuntimeError:
# not in an event loop — safe to run
asyncio.run(_send())
# ---------- Helpers ----------
def tag_with_mediafile(file_path: str, meta: dict):
f = MediaFile(file_path)
# --- Helper to safely set textual/number fields ---
def safe_set(attr, value, default=None, cast=None):
if value is None:
value = default
if value is not None:
if cast is not None:
if cast:
setattr(f, attr, cast(value))
else:
setattr(f, attr, str(value))
# --- Basic textual metadata ---
safe_set("title", meta.get("title"), default="Unknown Title")
safe_set("artist", meta.get("artist"), default="Unknown Artist")
safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist")
@@ -66,8 +114,6 @@ def tag_with_mediafile(file_path: str, meta: dict):
safe_set("disc", meta.get("disc_number"), default=0, cast=int)
safe_set("isrc", meta.get("isrc"), default="")
safe_set("bpm", meta.get("bpm"), default=0, cast=int)
# --- Release date ---
release_date_str = meta.get("release_date")
release_date_obj = None
if release_date_str:
@@ -75,34 +121,15 @@ def tag_with_mediafile(file_path: str, meta: dict):
release_date_obj = datetime.fromisoformat(release_date_str).date()
except ValueError:
try:
# fallback if only year string
release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date()
except Exception:
pass
if release_date_obj:
f.date = release_date_obj
# --- Save all tags ---
f.save()
def cleanup_empty_dirs(root: Path):
"""
Recursively remove any directories under root that contain no files
(empty or only empty subdirectories).
"""
for dirpath, dirnames, filenames in os.walk(root, topdown=False):
p = Path(dirpath)
has_file = any(f.is_file() for f in p.rglob("*"))
if not has_file:
try:
p.rmdir()
except Exception:
pass
def sanitize_filename(name: str) -> str:
"""Make a string safe for file/dir names."""
if not name:
return "Unknown"
name = name.replace("/", "-").replace("\\", "-")
@@ -113,18 +140,12 @@ def sanitize_filename(name: str) -> str:
def ensure_unique_path(p: Path) -> Path:
"""
Ensure the given file or directory path is unique *within its parent folder*.
Only appends (2), (3)... if a real conflict exists in that folder.
"""
parent = p.parent
stem, suffix = p.stem, p.suffix
existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()}
candidate = f"{stem}{suffix}"
if candidate not in existing:
return parent / candidate
counter = 2
while True:
candidate = f"{stem} ({counter}){suffix}"
@@ -133,7 +154,7 @@ def ensure_unique_path(p: Path) -> Path:
counter += 1
# ---------- Job ----------
# ---------- bulk_download ----------
def bulk_download(track_list: list, quality: str = "FLAC"):
"""
RQ job:
@@ -142,9 +163,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
- uses SR metadata to name/organize files
- creates ONE tarball for all tracks
- returns [tarball_path]
- sends relevant messages to Discord
"""
job = get_current_job()
job_id = job.id if job else uuid.uuid4().hex
target = job.meta.get("target") if job else None
staging_root = ROOT_DIR / job_id
if job:
@@ -156,30 +179,27 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
job.meta["status"] = "Started"
job.save_meta()
except Exception as e:
logging.warning("Failed to init job.meta: %s", e)
send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target)
# Job started Discord message
asyncio.run(discord_notify(
DISCORD_WEBHOOK,
title=f"Job Started: {job_id}",
description=f"Processing `{len(track_list)}` track(s)",
target=target,
color=0x00FFFF
))
async def process_tracks():
per_track_meta = []
all_final_files = []
all_artists = set()
(ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
async with aiohttp.ClientSession(headers=HEADERS) as session:
total = len(track_list or [])
logging.critical("Total tracks to process: %s", total)
if job:
job.meta["progress"] = 0
job.save_meta()
for i, track_id in enumerate(track_list or []):
track_info = {
"track_id": str(track_id),
"status": "Pending",
"file_path": None,
"error": None,
"attempts": 0,
}
track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0}
attempt = 0
while attempt < MAX_RETRIES:
@@ -195,7 +215,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
parsed = urlparse(url)
clean_path = unquote(parsed.path)
ext = Path(clean_path).suffix or ".mp3"
tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}")
async with session.get(url) as resp:
@@ -205,7 +224,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
f.write(chunk)
md = await sr.get_metadata_by_track_id(track_id) or {}
logging.info("Metadata for %s: %s", track_id, md)
artist_raw = md.get("artist") or "Unknown Artist"
album_raw = md.get("album") or "Unknown Album"
title_raw = md.get("title") or f"Track {track_id}"
@@ -215,14 +233,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
title = sanitize_filename(title_raw)
all_artists.add(artist)
artist_dir = staging_root / artist
album_dir = artist_dir / album
album_dir = staging_root / artist / album
album_dir.mkdir(parents=True, exist_ok=True)
final_file = ensure_unique_path(album_dir / f"{title}{ext}")
tag_with_mediafile(str(tmp_file), md)
tag_with_mediafile(str(tmp_file), md)
tmp_file.rename(final_file)
tmp_file = None
@@ -233,60 +248,48 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
if job:
job.meta["progress"] = int(((i + 1) / total) * 100)
job.meta["tracks"] = per_track_meta + [track_info]
job.save_meta()
break
except aiohttp.ClientResponseError as e:
msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}"
send_log_to_discord(msg, "WARNING", target)
if e.status == 429:
wait_time = min(60, 2**attempt) # exponential up to 60s
logging.warning(
"Rate limited (429). Sleeping %s seconds", wait_time
)
wait_time = min(60, 2**attempt)
await asyncio.sleep(wait_time)
else:
await asyncio.sleep(
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
)
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
except Exception as e:
logging.error(
"Track %s attempt %s failed: %s", track_id, attempt, e
)
traceback.print_exc()
tb = traceback.format_exc()
msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
send_log_to_discord(msg, "ERROR", target)
track_info["error"] = str(e)
if attempt >= MAX_RETRIES:
track_info["status"] = "Failed"
send_log_to_discord(f"Track {track_id} failed after {attempt} attempts", "ERROR", target)
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
finally:
try:
if tmp_file and tmp_file.exists():
tmp_file.unlink()
os.remove(tmp_file)
except Exception:
pass
per_track_meta.append(track_info)
if job:
try:
job.meta["tracks"] = per_track_meta
job.save_meta()
except Exception as e:
logging.warning(
"Failed to update job.meta after track %s: %s", track_id, e
)
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
if not all_final_files:
if job:
try:
job.meta["tarball"] = None
job.meta["status"] = "Failed"
job.save_meta()
except Exception:
pass
job.meta["tarball"] = None
job.meta["status"] = "Failed"
job.save_meta()
send_log_to_discord(f"No tracks were successfully downloaded for job `{job_id}`", "CRITICAL", target)
return []
artist_counts: dict[str, int] = {}
# Tarball creation
artist_counts = {}
for t in per_track_meta:
if t["status"] == "Success" and t.get("file_path"):
try:
@@ -294,17 +297,10 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
except Exception:
artist = "Unknown Artist"
artist_counts[artist] = artist_counts.get(artist, 0) + 1
if artist_counts:
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[
0
][0]
else:
top_artist = "Unknown Artist"
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist"
combined_artist = sanitize_filename(top_artist)
staged_tarball = staging_root / f"{combined_artist}.tar.gz"
# Ensure uniqueness (Windows-style padding) within the parent folder
counter = 1
base_name = staged_tarball.stem
while staged_tarball.exists():
@@ -315,69 +311,52 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
final_tarball.parent.mkdir(parents=True, exist_ok=True)
if job:
try:
job.meta["status"] = "Compressing"
job.save_meta()
except Exception:
pass
job.meta["status"] = "Compressing"
job.save_meta()
logging.info("Creating tarball: %s", staged_tarball)
def _create_tar_sync():
try:
subprocess.run(
[
"tar",
"-I",
"pigz -9",
"-cf",
str(staged_tarball),
"-C",
str(staging_root),
]
+ [str(f.relative_to(staging_root)) for f in all_final_files],
check=True,
)
await discord_notify(DISCORD_WEBHOOK,
title=f"Compressing: Job {job_id}",
description=f"Creating tarball (`{len(track_list)}` track(s)).\nStaging path: {staged_tarball}",
color=0xFFA500,
target=target)
try:
subprocess.run(
["tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root)]
+ [str(f.relative_to(staging_root)) for f in all_final_files],
check=True,
)
for f in all_final_files:
try:
os.remove(f)
except Exception:
pass
except FileNotFoundError:
send_log_to_discord("pigz not available, falling back to tarfile (slower).", "WARNING", target)
with tarfile.open(staged_tarball, "w:gz") as tar:
for f in all_final_files:
try:
arcname = f.relative_to(staging_root)
except ValueError:
arcname = f.name
tar.add(f, arcname=str(arcname))
try:
os.remove(f)
except Exception:
pass
except FileNotFoundError:
logging.warning("pigz not available, falling back to tarfile (slower).")
with tarfile.open(staged_tarball, "w:gz") as tar:
for f in all_final_files:
try:
arcname = f.relative_to(staging_root)
except ValueError:
arcname = f.name
tar.add(f, arcname=str(arcname))
try:
os.remove(f)
except Exception:
pass
await asyncio.to_thread(_create_tar_sync)
if not staged_tarball.exists():
logging.error("Tarball was not created: %s", staged_tarball)
send_log_to_discord(f"Tarball was not created: `{staged_tarball}`", "CRITICAL", target)
if job:
try:
job.meta["status"] = "compress_failed"
job.save_meta()
except Exception:
pass
job.meta["status"] = "compress_failed"
job.save_meta()
return []
logging.critical("Tarball created: %s", staged_tarball)
try:
staged_tarball.rename(final_tarball)
except Exception:
shutil.move(str(staged_tarball), str(final_tarball))
logging.critical("Tarball finalized: %s", final_tarball)
await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
if job:
@@ -386,6 +365,15 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
job.meta["status"] = "Completed"
job.save_meta()
# Job completed Discord message
await discord_notify(
DISCORD_WEBHOOK,
title=f"Job Completed: {job_id}",
description=f"Processed `{len(track_list)}` track(s). Tarball: `{final_tarball}`",
target=target,
color=0x00FF00
)
return [str(final_tarball)]
loop = asyncio.new_event_loop()
@@ -393,9 +381,9 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
try:
return loop.run_until_complete(process_tracks())
except Exception as e:
send_log_to_discord(f"bulk_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target)
if job:
job.meta["status"] = "Failed"
job.save_meta()
logging.critical("Exception: %s", str(e))
finally:
loop.close()

View File

@@ -2,7 +2,17 @@ from typing import Optional, Any
from uuid import uuid4
from urllib.parse import urlparse
import hashlib
import traceback
import logging
# Suppress all logging output from this module and its children
for name in [__name__, "utils.sr_wrapper"]:
logger = logging.getLogger(name)
logger.setLevel(logging.CRITICAL)
logger.propagate = False
for handler in logger.handlers:
handler.setLevel(logging.CRITICAL)
# Also set the root logger to CRITICAL as a last resort (may affect global logging)
logging.getLogger().setLevel(logging.CRITICAL)
import random
import asyncio
import os
@@ -11,6 +21,8 @@ import time
from streamrip.client import TidalClient # type: ignore
from streamrip.config import Config as StreamripConfig # type: ignore
from dotenv import load_dotenv
from rapidfuzz import fuzz
load_dotenv()
@@ -62,7 +74,18 @@ class SRUtil:
await asyncio.sleep(self.METADATA_RATE_LIMIT - elapsed)
result = await func(*args, **kwargs)
self.last_request_time = time.time()
return result
return result
def is_fuzzy_match(self, expected, actual, threshold=80):
if not expected or not actual:
return False
return fuzz.token_set_ratio(expected.lower(), actual.lower()) >= threshold
def is_metadata_match(self, expected_artist, expected_album, expected_title, found_artist, found_album, found_title, threshold=80):
artist_match = self.is_fuzzy_match(expected_artist, found_artist, threshold)
album_match = self.is_fuzzy_match(expected_album, found_album, threshold) if expected_album else True
title_match = self.is_fuzzy_match(expected_title, found_title, threshold)
return artist_match and album_match and title_match
def dedupe_by_key(self, key: str, entries: list[dict]) -> list[dict]:
deduped = {}
@@ -77,6 +100,23 @@ class SRUtil:
return None
m, s = divmod(seconds, 60)
return f"{m}:{s:02}"
def _get_tidal_cover_url(self, uuid, size):
"""Generate a tidal cover url.
:param uuid: VALID uuid string
:param size:
"""
TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg"
possibles = (80, 160, 320, 640, 1280)
assert size in possibles, f"size must be in {possibles}"
return TIDAL_COVER_URL.format(
uuid=uuid.replace("-", "/"),
height=size,
width=size,
)
def combine_album_track_metadata(
self, album_json: dict | None, track_json: dict
@@ -140,32 +180,33 @@ class SRUtil:
]
async def get_artists_by_name(self, artist_name: str) -> Optional[list]:
"""Get artist(s) by name.
Args:
artist_name (str): The name of the artist.
Returns:
Optional[dict]: The artist details or None if not found.
"""
try:
await self.streamrip_client.login()
except Exception as e:
logging.info("Login Exception: %s", str(e))
pass
"""Get artist(s) by name. Retry login only on authentication failure. Rate limit and retry on 400/429."""
import asyncio
artists_out: list[dict] = []
try:
artists = await self.streamrip_client.search(
media_type="artist", query=artist_name
)
except AttributeError:
await self.streamrip_client.login()
artists = await self.streamrip_client.search(
media_type="artist", query=artist_name
)
logging.critical("Artists output: %s", artists)
max_retries = 4
delay = 1.0
for attempt in range(max_retries):
try:
artists = await self.streamrip_client.search(
media_type="artist", query=artist_name
)
break
except AttributeError:
await self.streamrip_client.login()
if attempt == max_retries - 1:
return None
except Exception as e:
msg = str(e)
if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
await asyncio.sleep(delay)
delay *= 2
continue
else:
return None
else:
return None
artists = artists[0].get("items", [])
if not artists:
logging.warning("No artist found for name: %s", artist_name)
return None
artists_out = [
{
@@ -179,26 +220,33 @@ class SRUtil:
return artists_out
async def get_albums_by_artist_id(self, artist_id: int) -> Optional[list | dict]:
"""Get albums by artist ID
Args:
artist_id (int): The ID of the artist.
Returns:
Optional[list[dict]]: List of albums or None if not found.
"""
"""Get albums by artist ID. Retry login only on authentication failure. Rate limit and retry on 400/429."""
import asyncio
artist_id_str: str = str(artist_id)
albums_out: list[dict] = []
try:
await self.streamrip_client.login()
metadata = await self.streamrip_client.get_metadata(
item_id=artist_id_str, media_type="artist"
)
except AttributeError:
await self.streamrip_client.login()
metadata = await self.streamrip_client.get_metadata(
item_id=artist_id_str, media_type="artist"
)
max_retries = 4
delay = 1.0
for attempt in range(max_retries):
try:
metadata = await self.streamrip_client.get_metadata(
item_id=artist_id_str, media_type="artist"
)
break
except AttributeError:
await self.streamrip_client.login()
if attempt == max_retries - 1:
return None
except Exception as e:
msg = str(e)
if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
await asyncio.sleep(delay)
delay *= 2
continue
else:
return None
else:
return None
if not metadata:
logging.warning("No metadata found for artist ID: %s", artist_id)
return None
albums = self.dedupe_by_key("title", metadata.get("albums", []))
albums_out = [
@@ -211,9 +259,65 @@ class SRUtil:
for album in albums
if "title" in album and "id" in album and "artists" in album
]
logging.debug("Retrieved albums: %s", albums_out)
return albums_out
async def get_album_by_name(self, artist: str, album: str) -> Optional[dict]:
"""Get album by artist and album name using artist ID and fuzzy matching. Try first 8 chars, then 12 if no match. Notify on success."""
# Notification moved to add_cover_art.py as requested
for trunc in (8, 12):
search_artist = artist[:trunc]
artists = await self.get_artists_by_name(search_artist)
if not artists:
continue
best_artist = None
best_artist_score = 0
for a in artists:
score = fuzz.token_set_ratio(artist, a["artist"])
if score > best_artist_score:
best_artist = a
best_artist_score = int(score)
if not best_artist or best_artist_score < 85:
continue
artist_id = best_artist["id"]
albums = await self.get_albums_by_artist_id(artist_id)
if not albums:
continue
best_album = None
best_album_score = 0
for alb in albums:
score = fuzz.token_set_ratio(album, alb["album"])
if score > best_album_score:
best_album = alb
best_album_score = int(score)
if best_album and best_album_score >= 85:
return best_album
return None
async def get_cover_by_album_id(self, album_id: int, size: int = 640) -> Optional[str]:
"""Get cover URL by album ID. Retry login only on authentication failure."""
if size not in [80, 160, 320, 640, 1280]:
return None
album_id_str: str = str(album_id)
for attempt in range(2):
try:
metadata = await self.streamrip_client.get_metadata(
item_id=album_id_str, media_type="album"
)
break
except AttributeError:
await self.streamrip_client.login()
if attempt == 1:
return None
else:
return None
if not metadata:
return None
cover_id = metadata.get("cover")
if not cover_id:
return None
cover_url = self._get_tidal_cover_url(cover_id, size)
return cover_url
async def get_tracks_by_album_id(
self, album_id: int, quality: str = "FLAC"
@@ -247,7 +351,7 @@ class SRUtil:
]
return tracks_out
async def get_tracks_by_artist_song(self, artist: str, song: str) -> Optional[list]:
async def get_tracks_by_artist_song(self, artist: str, song: str, n: int = 0) -> Optional[list]:
"""Get track by artist and song name
Args:
artist (str): The name of the artist.
@@ -256,7 +360,23 @@ class SRUtil:
Optional[dict]: The track details or None if not found.
TODO: Reimplement using StreamRip
"""
return []
if not self.streamrip_client.logged_in:
await self.streamrip_client.login()
try:
search_res = await self.streamrip_client.search(media_type="track",
query=f"{artist} - {song}",
)
logging.critical("Result: %s", search_res)
return search_res[0].get('items')
except Exception as e:
traceback.print_exc()
logging.critical("Search Exception: %s", str(e))
if n < 3:
n+=1
return await self.get_tracks_by_artist_song(artist, song, n)
finally:
return []
# return []
async def get_stream_url_by_track_id(
self, track_id: int, quality: str = "FLAC"