Files
api/utils/rip_background.py

170 lines
6.1 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import asyncio
import random
import os
import tarfile
import uuid
import shutil
from pathlib import Path
from urllib.parse import urlparse, unquote
import aiohttp
from rq import get_current_job
from utils.sr_wrapper import SRUtil
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
# Constants
ROOT_DIR = Path("/storage/music2") # Change to your music folder
MAX_RETRIES = 3
THROTTLE_MIN = 0.2
THROTTLE_MAX = 1.5
HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/116.0.5845.97 Safari/537.36"
),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
}
# StreamRip utility
sr = SRUtil()
def bulk_download(track_list: list):
"""
Full RQ-compatible bulk download job with:
- async per-track URL fetching
- retry on failure
- per-track success/failure
- metadata extraction
- organized file storage
- throttling
- per-artist tarball creation
- progress updates
"""
job = get_current_job()
async def process_tracks():
per_track_meta = []
artist_files = {} # artist -> list of files
async with aiohttp.ClientSession(headers=HEADERS) as session:
total = len(track_list)
logging.critical("Total tracks to process: %s", total)
for i, track_id in enumerate(track_list):
track_info = {
"track_id": track_id,
"status": "pending",
"file_path": None,
"error": None,
}
attempt = 0
while attempt < MAX_RETRIES:
attempt += 1
try:
# 1⃣ Get track URL
url = await sr.get_stream_url_by_track_id(track_id)
if not url:
logging.critical(
"Failed to get URL for track: %s", track_id
)
await asyncio.sleep(
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
)
continue
# 2⃣ Download file (chunked)
parsed = urlparse(url)
ext = Path(unquote(parsed.path)).suffix or ".mp3"
tmp_file = Path(f"/tmp/{track_id}{ext}")
async with session.get(url) as resp:
resp.raise_for_status()
with open(tmp_file, "wb") as f:
async for chunk in resp.content.iter_chunked(64 * 1024):
f.write(chunk)
# 3⃣ Extract metadata
metadata = await sr.get_metadata_by_track_id(track_id)
if not metadata:
logging.critical(
"Failed to retrieve metadata for track ID: %s. Skipping",
track_id,
)
continue
artist = metadata.get("artist", "Unknown Artist")
album = metadata.get("album", "Unknown Album")
title = metadata.get("song", "Unknown Song")
logging.critical("Got metadata: %s/%s/%s", artist, album, title)
# 4⃣ Organize path
final_dir = ROOT_DIR / artist / album
final_dir.mkdir(parents=True, exist_ok=True)
final_file = final_dir / f"{title}{ext}"
tmp_file.rename(final_file)
# 5⃣ Track per-track info
track_info.update(
{"status": "success", "file_path": str(final_file)}
)
artist_files.setdefault(artist, []).append(final_file)
break # success
except Exception as e:
logging.error("Error downloading track %s: %s", track_id, e)
track_info["error"] = str(e)
if attempt >= MAX_RETRIES:
track_info["status"] = "failed"
else:
# small delay before retry
await asyncio.sleep(
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
)
# 6⃣ Update RQ job meta
per_track_meta.append(track_info)
if job:
job.meta["progress"] = int((i + 1) / total * 100)
job.meta["tracks"] = per_track_meta
job.save_meta()
# 7⃣ Throttle between downloads
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
# 8⃣ Create per-artist tarballs
tarballs = []
for artist, files in artist_files.items():
short_id = uuid.uuid4().hex[:8]
tarball_name = ROOT_DIR / "completed" / f"{artist}_{short_id}.tar.gz"
with tarfile.open(tarball_name, "w:gz") as tar:
for f in files:
tar.add(f, arcname=f.name)
os.remove(f) # remove original file
logging.critical("Created tarball: %s", tarball_name)
tarballs.append(str(tarball_name))
artist_dir = ROOT_DIR / artist
shutil.rmtree(artist_dir, ignore_errors=True)
return tarballs
# Run the async function synchronously
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(process_tracks())
finally:
loop.close()