import logging import asyncio import random import os import tarfile import uuid import re import shutil from pathlib import Path from urllib.parse import urlparse, unquote import aiohttp from rq import get_current_job from utils.sr_wrapper import SRUtil # Configure logging logging.basicConfig( level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) # Constants ROOT_DIR = Path("/storage/music2") # Change to your music folder MAX_RETRIES = 3 THROTTLE_MIN = 0.2 THROTTLE_MAX = 1.5 HEADERS = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/116.0.5845.97 Safari/537.36" ), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Connection": "keep-alive", } # StreamRip utility sr = SRUtil() import re def sanitize_filename(name: str) -> str: """ Remove or replace characters not allowed in filenames. Also trims whitespace and collapses consecutive spaces. """ # Replace slashes/backslashes with a dash name = name.replace("/", "-").replace("\\", "-") # Remove illegal characters for most OSes name = re.sub(r'[<>:"|?*\x00-\x1F]', "", name) # Strip leading/trailing spaces and dots name = name.strip().strip(".") # Collapse multiple spaces into one name = re.sub(r"\s+", " ", name) return name or "Unknown" def bulk_download(track_list: list): """ Full RQ-compatible bulk download job with: - async per-track URL fetching - retry on failure - per-track success/failure - metadata extraction - organized file storage - throttling - per-artist tarball creation - progress updates """ job = get_current_job() async def process_tracks(): per_track_meta = [] artist_files = {} # artist -> list of files async with aiohttp.ClientSession(headers=HEADERS) as session: total = len(track_list) logging.critical("Total tracks to process: %s", total) for i, track_id in enumerate(track_list): track_info = { "track_id": track_id, "status": "pending", "file_path": None, "error": None, } attempt = 0 while attempt < MAX_RETRIES: attempt += 1 try: # Get track URL url = await sr.get_stream_url_by_track_id(track_id) if not url: logging.critical( "Failed to get URL for track: %s", track_id ) await asyncio.sleep( random.uniform(THROTTLE_MIN, THROTTLE_MAX) ) continue # Download file (chunked) parsed = urlparse(url) ext = Path(unquote(parsed.path)).suffix or ".mp3" tmp_file = Path(f"/tmp/{track_id}{ext}") async with session.get(url) as resp: resp.raise_for_status() with open(tmp_file, "wb") as f: async for chunk in resp.content.iter_chunked(64 * 1024): f.write(chunk) # Extract metadata metadata = await sr.get_metadata_by_track_id(track_id) if not metadata: logging.critical( "Failed to retrieve metadata for track ID: %s. Skipping", track_id, ) continue artist = sanitize_filename(metadata.get("artist", "Unknown Artist")) album = sanitize_filename(metadata.get("album", "Unknown Album")) title = sanitize_filename(metadata.get("song", "Unknown Song")) logging.critical("Got metadata: %s/%s/%s", artist, album, title) # Organize path final_dir = ROOT_DIR / artist / album final_dir.mkdir(parents=True, exist_ok=True) final_file = final_dir / f"{title}{ext}" tmp_file.rename(final_file) # Track per-track info track_info.update( {"status": "success", "file_path": str(final_file)} ) artist_files.setdefault(artist, []).append(final_file) break # success except Exception as e: logging.error("Error downloading track %s: %s", track_id, e) track_info["error"] = str(e) if attempt >= MAX_RETRIES: track_info["status"] = "failed" else: # small delay before retry await asyncio.sleep( random.uniform(THROTTLE_MIN, THROTTLE_MAX) ) # Update RQ job meta per_track_meta.append(track_info) if job: job.meta["progress"] = int((i + 1) / total * 100) job.meta["tracks"] = track_list job.save_meta() # Throttle between downloads await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) # Create per-artist tarballs tarballs = [] for artist, files in artist_files.items(): short_id = uuid.uuid4().hex[:8] tarball_name = ROOT_DIR / "completed" / f"{artist}_{short_id}.tar.gz" with tarfile.open(tarball_name, "w:gz") as tar: for f in files: tar.add(f, arcname=f.name) os.remove(f) # remove original file logging.critical("Created tarball: %s", tarball_name) tarballs.append(str(tarball_name)) artist_dir = ROOT_DIR / artist shutil.rmtree(artist_dir, ignore_errors=True) return tarballs # Run the async function synchronously loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: return loop.run_until_complete(process_tracks()) finally: loop.close()