170 lines
6.1 KiB
Python
170 lines
6.1 KiB
Python
|
import logging
|
|||
|
import asyncio
|
|||
|
import random
|
|||
|
import os
|
|||
|
import tarfile
|
|||
|
import uuid
|
|||
|
import shutil
|
|||
|
from pathlib import Path
|
|||
|
from urllib.parse import urlparse, unquote
|
|||
|
|
|||
|
import aiohttp
|
|||
|
from rq import get_current_job
|
|||
|
from utils.sr_wrapper import SRUtil
|
|||
|
|
|||
|
# Configure logging
|
|||
|
logging.basicConfig(
|
|||
|
level=logging.DEBUG,
|
|||
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|||
|
)
|
|||
|
|
|||
|
# Constants
|
|||
|
ROOT_DIR = Path("/storage/music2") # Change to your music folder
|
|||
|
MAX_RETRIES = 3
|
|||
|
THROTTLE_MIN = 0.2
|
|||
|
THROTTLE_MAX = 1.5
|
|||
|
|
|||
|
HEADERS = {
|
|||
|
"User-Agent": (
|
|||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|||
|
"Chrome/116.0.5845.97 Safari/537.36"
|
|||
|
),
|
|||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|||
|
"Accept-Language": "en-US,en;q=0.9",
|
|||
|
"Connection": "keep-alive",
|
|||
|
}
|
|||
|
|
|||
|
# StreamRip utility
|
|||
|
sr = SRUtil()
|
|||
|
|
|||
|
|
|||
|
def bulk_download(track_list: list):
|
|||
|
"""
|
|||
|
Full RQ-compatible bulk download job with:
|
|||
|
- async per-track URL fetching
|
|||
|
- retry on failure
|
|||
|
- per-track success/failure
|
|||
|
- metadata extraction
|
|||
|
- organized file storage
|
|||
|
- throttling
|
|||
|
- per-artist tarball creation
|
|||
|
- progress updates
|
|||
|
"""
|
|||
|
job = get_current_job()
|
|||
|
|
|||
|
async def process_tracks():
|
|||
|
per_track_meta = []
|
|||
|
artist_files = {} # artist -> list of files
|
|||
|
|
|||
|
async with aiohttp.ClientSession(headers=HEADERS) as session:
|
|||
|
total = len(track_list)
|
|||
|
logging.critical("Total tracks to process: %s", total)
|
|||
|
|
|||
|
for i, track_id in enumerate(track_list):
|
|||
|
track_info = {
|
|||
|
"track_id": track_id,
|
|||
|
"status": "pending",
|
|||
|
"file_path": None,
|
|||
|
"error": None,
|
|||
|
}
|
|||
|
attempt = 0
|
|||
|
|
|||
|
while attempt < MAX_RETRIES:
|
|||
|
attempt += 1
|
|||
|
try:
|
|||
|
# 1️⃣ Get track URL
|
|||
|
url = await sr.get_stream_url_by_track_id(track_id)
|
|||
|
if not url:
|
|||
|
logging.critical(
|
|||
|
"Failed to get URL for track: %s", track_id
|
|||
|
)
|
|||
|
await asyncio.sleep(
|
|||
|
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
|
|||
|
)
|
|||
|
continue
|
|||
|
|
|||
|
# 2️⃣ Download file (chunked)
|
|||
|
parsed = urlparse(url)
|
|||
|
ext = Path(unquote(parsed.path)).suffix or ".mp3"
|
|||
|
tmp_file = Path(f"/tmp/{track_id}{ext}")
|
|||
|
|
|||
|
async with session.get(url) as resp:
|
|||
|
resp.raise_for_status()
|
|||
|
with open(tmp_file, "wb") as f:
|
|||
|
async for chunk in resp.content.iter_chunked(64 * 1024):
|
|||
|
f.write(chunk)
|
|||
|
|
|||
|
# 3️⃣ Extract metadata
|
|||
|
metadata = await sr.get_metadata_by_track_id(track_id)
|
|||
|
if not metadata:
|
|||
|
logging.critical(
|
|||
|
"Failed to retrieve metadata for track ID: %s. Skipping",
|
|||
|
track_id,
|
|||
|
)
|
|||
|
continue
|
|||
|
artist = metadata.get("artist", "Unknown Artist")
|
|||
|
album = metadata.get("album", "Unknown Album")
|
|||
|
title = metadata.get("song", "Unknown Song")
|
|||
|
|
|||
|
logging.critical("Got metadata: %s/%s/%s", artist, album, title)
|
|||
|
|
|||
|
# 4️⃣ Organize path
|
|||
|
final_dir = ROOT_DIR / artist / album
|
|||
|
final_dir.mkdir(parents=True, exist_ok=True)
|
|||
|
final_file = final_dir / f"{title}{ext}"
|
|||
|
tmp_file.rename(final_file)
|
|||
|
|
|||
|
# 5️⃣ Track per-track info
|
|||
|
track_info.update(
|
|||
|
{"status": "success", "file_path": str(final_file)}
|
|||
|
)
|
|||
|
artist_files.setdefault(artist, []).append(final_file)
|
|||
|
|
|||
|
break # success
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
logging.error("Error downloading track %s: %s", track_id, e)
|
|||
|
track_info["error"] = str(e)
|
|||
|
if attempt >= MAX_RETRIES:
|
|||
|
track_info["status"] = "failed"
|
|||
|
else:
|
|||
|
# small delay before retry
|
|||
|
await asyncio.sleep(
|
|||
|
random.uniform(THROTTLE_MIN, THROTTLE_MAX)
|
|||
|
)
|
|||
|
|
|||
|
# 6️⃣ Update RQ job meta
|
|||
|
per_track_meta.append(track_info)
|
|||
|
if job:
|
|||
|
job.meta["progress"] = int((i + 1) / total * 100)
|
|||
|
job.meta["tracks"] = per_track_meta
|
|||
|
job.save_meta()
|
|||
|
|
|||
|
# 7️⃣ Throttle between downloads
|
|||
|
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
|
|||
|
|
|||
|
# 8️⃣ Create per-artist tarballs
|
|||
|
tarballs = []
|
|||
|
for artist, files in artist_files.items():
|
|||
|
short_id = uuid.uuid4().hex[:8]
|
|||
|
tarball_name = ROOT_DIR / "completed" / f"{artist}_{short_id}.tar.gz"
|
|||
|
with tarfile.open(tarball_name, "w:gz") as tar:
|
|||
|
for f in files:
|
|||
|
tar.add(f, arcname=f.name)
|
|||
|
os.remove(f) # remove original file
|
|||
|
logging.critical("Created tarball: %s", tarball_name)
|
|||
|
tarballs.append(str(tarball_name))
|
|||
|
artist_dir = ROOT_DIR / artist
|
|||
|
shutil.rmtree(artist_dir, ignore_errors=True)
|
|||
|
|
|||
|
return tarballs
|
|||
|
|
|||
|
# Run the async function synchronously
|
|||
|
loop = asyncio.new_event_loop()
|
|||
|
asyncio.set_event_loop(loop)
|
|||
|
try:
|
|||
|
return loop.run_until_complete(process_tracks())
|
|||
|
finally:
|
|||
|
loop.close()
|