import logging import asyncio import random import os import tarfile import uuid import shutil from pathlib import Path from urllib.parse import urlparse, unquote import aiohttp from rq import get_current_job from utils.sr_wrapper import SRUtil # Configure logging logging.basicConfig( level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) # Constants ROOT_DIR = Path("/storage/music2") # Change to your music folder MAX_RETRIES = 3 THROTTLE_MIN = 0.2 THROTTLE_MAX = 1.5 HEADERS = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/116.0.5845.97 Safari/537.36" ), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Connection": "keep-alive", } # StreamRip utility sr = SRUtil() def bulk_download(track_list: list): """ Full RQ-compatible bulk download job with: - async per-track URL fetching - retry on failure - per-track success/failure - metadata extraction - organized file storage - throttling - per-artist tarball creation - progress updates """ job = get_current_job() async def process_tracks(): per_track_meta = [] artist_files = {} # artist -> list of files async with aiohttp.ClientSession(headers=HEADERS) as session: total = len(track_list) logging.critical("Total tracks to process: %s", total) for i, track_id in enumerate(track_list): track_info = { "track_id": track_id, "status": "pending", "file_path": None, "error": None, } attempt = 0 while attempt < MAX_RETRIES: attempt += 1 try: # 1️⃣ Get track URL url = await sr.get_stream_url_by_track_id(track_id) if not url: logging.critical( "Failed to get URL for track: %s", track_id ) await asyncio.sleep( random.uniform(THROTTLE_MIN, THROTTLE_MAX) ) continue # 2️⃣ Download file (chunked) parsed = urlparse(url) ext = Path(unquote(parsed.path)).suffix or ".mp3" tmp_file = Path(f"/tmp/{track_id}{ext}") async with session.get(url) as resp: resp.raise_for_status() with open(tmp_file, "wb") as f: async for chunk in resp.content.iter_chunked(64 * 1024): f.write(chunk) # 3️⃣ Extract metadata metadata = await sr.get_metadata_by_track_id(track_id) if not metadata: logging.critical( "Failed to retrieve metadata for track ID: %s. Skipping", track_id, ) continue artist = metadata.get("artist", "Unknown Artist") album = metadata.get("album", "Unknown Album") title = metadata.get("song", "Unknown Song") logging.critical("Got metadata: %s/%s/%s", artist, album, title) # 4️⃣ Organize path final_dir = ROOT_DIR / artist / album final_dir.mkdir(parents=True, exist_ok=True) final_file = final_dir / f"{title}{ext}" tmp_file.rename(final_file) # 5️⃣ Track per-track info track_info.update( {"status": "success", "file_path": str(final_file)} ) artist_files.setdefault(artist, []).append(final_file) break # success except Exception as e: logging.error("Error downloading track %s: %s", track_id, e) track_info["error"] = str(e) if attempt >= MAX_RETRIES: track_info["status"] = "failed" else: # small delay before retry await asyncio.sleep( random.uniform(THROTTLE_MIN, THROTTLE_MAX) ) # 6️⃣ Update RQ job meta per_track_meta.append(track_info) if job: job.meta["progress"] = int((i + 1) / total * 100) job.meta["tracks"] = per_track_meta job.save_meta() # 7️⃣ Throttle between downloads await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) # 8️⃣ Create per-artist tarballs tarballs = [] for artist, files in artist_files.items(): short_id = uuid.uuid4().hex[:8] tarball_name = ROOT_DIR / "completed" / f"{artist}_{short_id}.tar.gz" with tarfile.open(tarball_name, "w:gz") as tar: for f in files: tar.add(f, arcname=f.name) os.remove(f) # remove original file logging.critical("Created tarball: %s", tarball_name) tarballs.append(str(tarball_name)) artist_dir = ROOT_DIR / artist shutil.rmtree(artist_dir, ignore_errors=True) return tarballs # Run the async function synchronously loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: return loop.run_until_complete(process_tracks()) finally: loop.close()