misc
This commit is contained in:
@@ -7,20 +7,23 @@ import traceback
|
||||
import uuid
|
||||
import subprocess
|
||||
import shutil
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse, unquote
|
||||
import aiohttp
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from mediafile import MediaFile # type: ignore[import]
|
||||
from rq import get_current_job
|
||||
from utils.sr_wrapper import SRUtil
|
||||
from dotenv import load_dotenv
|
||||
import re
|
||||
|
||||
# ---------- Config ----------
|
||||
ROOT_DIR = Path("/storage/music2")
|
||||
MAX_RETRIES = 5
|
||||
THROTTLE_MIN = 1.0
|
||||
THROTTLE_MAX = 3.5
|
||||
DISCORD_WEBHOOK = os.getenv("TRIP_WEBHOOK_URI", "").strip()
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
@@ -38,26 +41,71 @@ logging.basicConfig(
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
sr = SRUtil()
|
||||
|
||||
|
||||
# ---------- Discord helper ----------
|
||||
async def discord_notify(webhook_url: str, title: str, description: str, target: Optional[str] = None, color: int = 0x00FF00):
|
||||
embed = {
|
||||
"title": title,
|
||||
"description": description[:1900] if description else "",
|
||||
"color": color,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
if target:
|
||||
embed["fields"] = [{"name": "Target", "value": str(target), "inline": True}]
|
||||
|
||||
payload = {
|
||||
"embeds": [embed],
|
||||
}
|
||||
|
||||
while True: # permanent retry
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(webhook_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as resp:
|
||||
if resp.status >= 400:
|
||||
text = await resp.text()
|
||||
raise RuntimeError(f"Discord webhook failed ({resp.status}): {text}")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"Discord send failed, retrying: {e}")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
def send_log_to_discord(message: str, level: str, target: Optional[str] = None):
|
||||
colors = {"WARNING": 0xFFA500, "ERROR": 0xFF0000, "CRITICAL": 0xFF0000}
|
||||
color = colors.get(level.upper(), 0xFFFF00)
|
||||
|
||||
async def _send():
|
||||
await discord_notify(
|
||||
webhook_url=DISCORD_WEBHOOK,
|
||||
title=f"{level} in bulk_download",
|
||||
description=message,
|
||||
target=target,
|
||||
color=color
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
# already in an event loop — schedule a task
|
||||
asyncio.create_task(_send())
|
||||
except RuntimeError:
|
||||
# not in an event loop — safe to run
|
||||
asyncio.run(_send())
|
||||
|
||||
|
||||
# ---------- Helpers ----------
|
||||
|
||||
|
||||
def tag_with_mediafile(file_path: str, meta: dict):
|
||||
f = MediaFile(file_path)
|
||||
|
||||
# --- Helper to safely set textual/number fields ---
|
||||
def safe_set(attr, value, default=None, cast=None):
|
||||
if value is None:
|
||||
value = default
|
||||
if value is not None:
|
||||
if cast is not None:
|
||||
if cast:
|
||||
setattr(f, attr, cast(value))
|
||||
else:
|
||||
setattr(f, attr, str(value))
|
||||
|
||||
# --- Basic textual metadata ---
|
||||
safe_set("title", meta.get("title"), default="Unknown Title")
|
||||
safe_set("artist", meta.get("artist"), default="Unknown Artist")
|
||||
safe_set("albumartist", meta.get("album_artist"), default="Unknown Artist")
|
||||
@@ -66,8 +114,6 @@ def tag_with_mediafile(file_path: str, meta: dict):
|
||||
safe_set("disc", meta.get("disc_number"), default=0, cast=int)
|
||||
safe_set("isrc", meta.get("isrc"), default="")
|
||||
safe_set("bpm", meta.get("bpm"), default=0, cast=int)
|
||||
|
||||
# --- Release date ---
|
||||
release_date_str = meta.get("release_date")
|
||||
release_date_obj = None
|
||||
if release_date_str:
|
||||
@@ -75,34 +121,15 @@ def tag_with_mediafile(file_path: str, meta: dict):
|
||||
release_date_obj = datetime.fromisoformat(release_date_str).date()
|
||||
except ValueError:
|
||||
try:
|
||||
# fallback if only year string
|
||||
release_date_obj = datetime(int(release_date_str[:4]), 1, 1).date()
|
||||
except Exception:
|
||||
pass
|
||||
if release_date_obj:
|
||||
f.date = release_date_obj
|
||||
|
||||
# --- Save all tags ---
|
||||
f.save()
|
||||
|
||||
|
||||
def cleanup_empty_dirs(root: Path):
|
||||
"""
|
||||
Recursively remove any directories under root that contain no files
|
||||
(empty or only empty subdirectories).
|
||||
"""
|
||||
for dirpath, dirnames, filenames in os.walk(root, topdown=False):
|
||||
p = Path(dirpath)
|
||||
has_file = any(f.is_file() for f in p.rglob("*"))
|
||||
if not has_file:
|
||||
try:
|
||||
p.rmdir()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def sanitize_filename(name: str) -> str:
|
||||
"""Make a string safe for file/dir names."""
|
||||
if not name:
|
||||
return "Unknown"
|
||||
name = name.replace("/", "-").replace("\\", "-")
|
||||
@@ -113,18 +140,12 @@ def sanitize_filename(name: str) -> str:
|
||||
|
||||
|
||||
def ensure_unique_path(p: Path) -> Path:
|
||||
"""
|
||||
Ensure the given file or directory path is unique *within its parent folder*.
|
||||
Only appends (2), (3)... if a real conflict exists in that folder.
|
||||
"""
|
||||
parent = p.parent
|
||||
stem, suffix = p.stem, p.suffix
|
||||
existing = {f.name for f in parent.glob(f"*{suffix}") if f.is_file()}
|
||||
|
||||
candidate = f"{stem}{suffix}"
|
||||
if candidate not in existing:
|
||||
return parent / candidate
|
||||
|
||||
counter = 2
|
||||
while True:
|
||||
candidate = f"{stem} ({counter}){suffix}"
|
||||
@@ -133,7 +154,7 @@ def ensure_unique_path(p: Path) -> Path:
|
||||
counter += 1
|
||||
|
||||
|
||||
# ---------- Job ----------
|
||||
# ---------- bulk_download ----------
|
||||
def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
"""
|
||||
RQ job:
|
||||
@@ -142,9 +163,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
- uses SR metadata to name/organize files
|
||||
- creates ONE tarball for all tracks
|
||||
- returns [tarball_path]
|
||||
- sends relevant messages to Discord
|
||||
"""
|
||||
job = get_current_job()
|
||||
job_id = job.id if job else uuid.uuid4().hex
|
||||
target = job.meta.get("target") if job else None
|
||||
staging_root = ROOT_DIR / job_id
|
||||
|
||||
if job:
|
||||
@@ -156,30 +179,27 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
job.meta["status"] = "Started"
|
||||
job.save_meta()
|
||||
except Exception as e:
|
||||
logging.warning("Failed to init job.meta: %s", e)
|
||||
send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target)
|
||||
|
||||
# Job started Discord message
|
||||
asyncio.run(discord_notify(
|
||||
DISCORD_WEBHOOK,
|
||||
title=f"Job Started: {job_id}",
|
||||
description=f"Processing `{len(track_list)}` track(s)",
|
||||
target=target,
|
||||
color=0x00FFFF
|
||||
))
|
||||
|
||||
async def process_tracks():
|
||||
per_track_meta = []
|
||||
all_final_files = []
|
||||
all_artists = set()
|
||||
|
||||
(ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async with aiohttp.ClientSession(headers=HEADERS) as session:
|
||||
total = len(track_list or [])
|
||||
logging.critical("Total tracks to process: %s", total)
|
||||
if job:
|
||||
job.meta["progress"] = 0
|
||||
job.save_meta()
|
||||
|
||||
for i, track_id in enumerate(track_list or []):
|
||||
track_info = {
|
||||
"track_id": str(track_id),
|
||||
"status": "Pending",
|
||||
"file_path": None,
|
||||
"error": None,
|
||||
"attempts": 0,
|
||||
}
|
||||
track_info = {"track_id": str(track_id), "status": "Pending", "file_path": None, "error": None, "attempts": 0}
|
||||
attempt = 0
|
||||
|
||||
while attempt < MAX_RETRIES:
|
||||
@@ -195,7 +215,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
parsed = urlparse(url)
|
||||
clean_path = unquote(parsed.path)
|
||||
ext = Path(clean_path).suffix or ".mp3"
|
||||
|
||||
tmp_file = Path(f"/tmp/{uuid.uuid4().hex}{ext}")
|
||||
|
||||
async with session.get(url) as resp:
|
||||
@@ -205,7 +224,6 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
f.write(chunk)
|
||||
|
||||
md = await sr.get_metadata_by_track_id(track_id) or {}
|
||||
logging.info("Metadata for %s: %s", track_id, md)
|
||||
artist_raw = md.get("artist") or "Unknown Artist"
|
||||
album_raw = md.get("album") or "Unknown Album"
|
||||
title_raw = md.get("title") or f"Track {track_id}"
|
||||
@@ -215,14 +233,11 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
title = sanitize_filename(title_raw)
|
||||
|
||||
all_artists.add(artist)
|
||||
|
||||
artist_dir = staging_root / artist
|
||||
album_dir = artist_dir / album
|
||||
album_dir = staging_root / artist / album
|
||||
album_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
final_file = ensure_unique_path(album_dir / f"{title}{ext}")
|
||||
tag_with_mediafile(str(tmp_file), md)
|
||||
|
||||
tag_with_mediafile(str(tmp_file), md)
|
||||
tmp_file.rename(final_file)
|
||||
tmp_file = None
|
||||
|
||||
@@ -233,60 +248,48 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
|
||||
if job:
|
||||
job.meta["progress"] = int(((i + 1) / total) * 100)
|
||||
job.meta["tracks"] = per_track_meta + [track_info]
|
||||
job.save_meta()
|
||||
break
|
||||
|
||||
except aiohttp.ClientResponseError as e:
|
||||
msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}"
|
||||
send_log_to_discord(msg, "WARNING", target)
|
||||
if e.status == 429:
|
||||
wait_time = min(60, 2**attempt) # exponential up to 60s
|
||||
logging.warning(
|
||||
"Rate limited (429). Sleeping %s seconds", wait_time
|
||||
)
|
||||
wait_time = min(60, 2**attempt)
|
||||
await asyncio.sleep(wait_time)
|
||||
else:
|
||||
await asyncio.sleep(
|
||||
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
|
||||
)
|
||||
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
||||
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
"Track %s attempt %s failed: %s", track_id, attempt, e
|
||||
)
|
||||
traceback.print_exc()
|
||||
tb = traceback.format_exc()
|
||||
msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
|
||||
send_log_to_discord(msg, "ERROR", target)
|
||||
track_info["error"] = str(e)
|
||||
if attempt >= MAX_RETRIES:
|
||||
track_info["status"] = "Failed"
|
||||
send_log_to_discord(f"Track {track_id} failed after {attempt} attempts", "ERROR", target)
|
||||
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
||||
|
||||
finally:
|
||||
try:
|
||||
if tmp_file and tmp_file.exists():
|
||||
tmp_file.unlink()
|
||||
os.remove(tmp_file)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
per_track_meta.append(track_info)
|
||||
if job:
|
||||
try:
|
||||
job.meta["tracks"] = per_track_meta
|
||||
job.save_meta()
|
||||
except Exception as e:
|
||||
logging.warning(
|
||||
"Failed to update job.meta after track %s: %s", track_id, e
|
||||
)
|
||||
|
||||
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
||||
|
||||
if not all_final_files:
|
||||
if job:
|
||||
try:
|
||||
job.meta["tarball"] = None
|
||||
job.meta["status"] = "Failed"
|
||||
job.save_meta()
|
||||
except Exception:
|
||||
pass
|
||||
job.meta["tarball"] = None
|
||||
job.meta["status"] = "Failed"
|
||||
job.save_meta()
|
||||
send_log_to_discord(f"No tracks were successfully downloaded for job `{job_id}`", "CRITICAL", target)
|
||||
return []
|
||||
|
||||
artist_counts: dict[str, int] = {}
|
||||
# Tarball creation
|
||||
artist_counts = {}
|
||||
for t in per_track_meta:
|
||||
if t["status"] == "Success" and t.get("file_path"):
|
||||
try:
|
||||
@@ -294,17 +297,10 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
except Exception:
|
||||
artist = "Unknown Artist"
|
||||
artist_counts[artist] = artist_counts.get(artist, 0) + 1
|
||||
|
||||
if artist_counts:
|
||||
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[
|
||||
0
|
||||
][0]
|
||||
else:
|
||||
top_artist = "Unknown Artist"
|
||||
|
||||
top_artist = sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] if artist_counts else "Unknown Artist"
|
||||
combined_artist = sanitize_filename(top_artist)
|
||||
staged_tarball = staging_root / f"{combined_artist}.tar.gz"
|
||||
# Ensure uniqueness (Windows-style padding) within the parent folder
|
||||
|
||||
counter = 1
|
||||
base_name = staged_tarball.stem
|
||||
while staged_tarball.exists():
|
||||
@@ -315,69 +311,52 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
final_tarball.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if job:
|
||||
try:
|
||||
job.meta["status"] = "Compressing"
|
||||
job.save_meta()
|
||||
except Exception:
|
||||
pass
|
||||
job.meta["status"] = "Compressing"
|
||||
job.save_meta()
|
||||
|
||||
logging.info("Creating tarball: %s", staged_tarball)
|
||||
|
||||
def _create_tar_sync():
|
||||
try:
|
||||
subprocess.run(
|
||||
[
|
||||
"tar",
|
||||
"-I",
|
||||
"pigz -9",
|
||||
"-cf",
|
||||
str(staged_tarball),
|
||||
"-C",
|
||||
str(staging_root),
|
||||
]
|
||||
+ [str(f.relative_to(staging_root)) for f in all_final_files],
|
||||
check=True,
|
||||
)
|
||||
await discord_notify(DISCORD_WEBHOOK,
|
||||
title=f"Compressing: Job {job_id}",
|
||||
description=f"Creating tarball (`{len(track_list)}` track(s)).\nStaging path: {staged_tarball}",
|
||||
color=0xFFA500,
|
||||
target=target)
|
||||
try:
|
||||
subprocess.run(
|
||||
["tar", "-I", "pigz -9", "-cf", str(staged_tarball), "-C", str(staging_root)]
|
||||
+ [str(f.relative_to(staging_root)) for f in all_final_files],
|
||||
check=True,
|
||||
)
|
||||
for f in all_final_files:
|
||||
try:
|
||||
os.remove(f)
|
||||
except Exception:
|
||||
pass
|
||||
except FileNotFoundError:
|
||||
send_log_to_discord("pigz not available, falling back to tarfile (slower).", "WARNING", target)
|
||||
with tarfile.open(staged_tarball, "w:gz") as tar:
|
||||
for f in all_final_files:
|
||||
try:
|
||||
arcname = f.relative_to(staging_root)
|
||||
except ValueError:
|
||||
arcname = f.name
|
||||
tar.add(f, arcname=str(arcname))
|
||||
try:
|
||||
os.remove(f)
|
||||
except Exception:
|
||||
pass
|
||||
except FileNotFoundError:
|
||||
logging.warning("pigz not available, falling back to tarfile (slower).")
|
||||
with tarfile.open(staged_tarball, "w:gz") as tar:
|
||||
for f in all_final_files:
|
||||
try:
|
||||
arcname = f.relative_to(staging_root)
|
||||
except ValueError:
|
||||
arcname = f.name
|
||||
tar.add(f, arcname=str(arcname))
|
||||
try:
|
||||
os.remove(f)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await asyncio.to_thread(_create_tar_sync)
|
||||
|
||||
if not staged_tarball.exists():
|
||||
logging.error("Tarball was not created: %s", staged_tarball)
|
||||
send_log_to_discord(f"Tarball was not created: `{staged_tarball}`", "CRITICAL", target)
|
||||
if job:
|
||||
try:
|
||||
job.meta["status"] = "compress_failed"
|
||||
job.save_meta()
|
||||
except Exception:
|
||||
pass
|
||||
job.meta["status"] = "compress_failed"
|
||||
job.save_meta()
|
||||
return []
|
||||
|
||||
logging.critical("Tarball created: %s", staged_tarball)
|
||||
|
||||
try:
|
||||
staged_tarball.rename(final_tarball)
|
||||
except Exception:
|
||||
shutil.move(str(staged_tarball), str(final_tarball))
|
||||
|
||||
logging.critical("Tarball finalized: %s", final_tarball)
|
||||
|
||||
await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
|
||||
|
||||
if job:
|
||||
@@ -386,6 +365,15 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
job.meta["status"] = "Completed"
|
||||
job.save_meta()
|
||||
|
||||
# Job completed Discord message
|
||||
await discord_notify(
|
||||
DISCORD_WEBHOOK,
|
||||
title=f"Job Completed: {job_id}",
|
||||
description=f"Processed `{len(track_list)}` track(s). Tarball: `{final_tarball}`",
|
||||
target=target,
|
||||
color=0x00FF00
|
||||
)
|
||||
|
||||
return [str(final_tarball)]
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
@@ -393,9 +381,9 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
|
||||
try:
|
||||
return loop.run_until_complete(process_tracks())
|
||||
except Exception as e:
|
||||
send_log_to_discord(f"bulk_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target)
|
||||
if job:
|
||||
job.meta["status"] = "Failed"
|
||||
job.save_meta()
|
||||
logging.critical("Exception: %s", str(e))
|
||||
finally:
|
||||
loop.close()
|
||||
|
Reference in New Issue
Block a user