Add bulk video download functionality

- Implemented `bulk_video_download` function to handle video downloads, including metadata fetching, HLS stream handling, and tarball creation. - Enhanced `bulk_download` function in `rip_background.py` to improve error logging with formatted track descriptions. - Added video search and metadata retrieval methods in `sr_wrapper.py` for better integration with Tidal's video API. - Updated Tidal client credentials
2026-02-18 13:38:26 -05:00
parent 9d16c96490
commit d6689b9c38
4 changed files with 1257 additions and 81 deletions
--- a/utils/rip_background.py
+++ b/utils/rip_background.py
@@ -320,6 +320,15 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                }
                attempt = 0

+                def _track_desc() -> str:
+                    """Format track info for log messages."""
+                    title = track_info.get("title") or f"Track {track_id}"
+                    artist = track_info.get("artist") or "Unknown"
+                    album = track_info.get("album") or ""
+                    if album:
+                        return f"{track_id} - '{title}' by {artist} [{album}]"
+                    return f"{track_id} - '{title}' by {artist}"
+
                # Fetch metadata FIRST to check if track is available before attempting download
                md = None
                try:
@@ -327,6 +336,12 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                    md = await sr.get_metadata_by_track_id(track_id) or {}
                    print(f"DEBUG: Metadata fetched: {bool(md)}")

+                    # Populate track_info immediately so failure logs have useful info
+                    if md:
+                        track_info["title"] = md.get("title") or f"Track {track_id}"
+                        track_info["artist"] = md.get("artist") or "Unknown Artist"
+                        track_info["album"] = md.get("album") or "Unknown Album"
+
                    # Check if track is streamable
                    if md and not md.get("streamable", True):
                        print(f"TRACK {track_id}: Not streamable, skipping")
@@ -564,7 +579,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                        break

                    except aiohttp.ClientResponseError as e:
-                        msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}"
+                        msg = f"Track {_track_desc()} attempt {attempt} ClientResponseError: {e}"
                        send_log_to_discord(msg, "WARNING", target)
                        # If 429, backoff as before. If 5xx, recreate session and refresh Tidal client.
                        if getattr(e, "status", None) == 429:
@@ -581,7 +596,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                            try:
                                await sr._force_fresh_login()
                                send_log_to_discord(
-                                    f"Refreshed Tidal session after 5xx error on track {track_id}",
+                                    f"Refreshed Tidal session after 5xx error on track {_track_desc()}",
                                    "WARNING",
                                    target,
                                )
@@ -625,7 +640,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                        if is_not_found:
                            # Permanent failure - do not retry
                            msg = (
-                                f"Track {track_id} not found/unavailable, skipping: {e}"
+                                f"Track {_track_desc()} not found/unavailable, skipping: {e}"
                            )
                            print(msg)
                            send_log_to_discord(msg, "WARNING", target)
@@ -634,7 +649,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                            break  # Exit retry loop immediately
                        elif is_5xx_error:
                            msg = (
-                                f"Track {track_id} attempt {attempt} server error: {e}"
+                                f"Track {_track_desc()} attempt {attempt} server error: {e}"
                            )
                            send_log_to_discord(msg, "WARNING", target)
                            track_info["error"] = err_str
@@ -648,7 +663,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                            try:
                                await sr._force_fresh_login()
                                send_log_to_discord(
-                                    f"Refreshed Tidal session after 5xx error on track {track_id}",
+                                    f"Refreshed Tidal session after 5xx error on track {_track_desc()}",
                                    "WARNING",
                                    target,
                                )
@@ -661,7 +676,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                            if attempt >= MAX_RETRIES:
                                track_info["status"] = "Failed"
                                send_log_to_discord(
-                                    f"Track {track_id} failed after {attempt} attempts (5xx)",
+                                    f"Track {_track_desc()} failed after {attempt} attempts (5xx)",
                                    "ERROR",
                                    target,
                                )
@@ -670,13 +685,13 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                            )
                        elif is_no_stream_url:
                            if attempt == 1 or attempt == MAX_RETRIES:
-                                msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
+                                msg = f"Track {_track_desc()} attempt {attempt} failed: {e}\n{tb}"
                                send_log_to_discord(msg, "ERROR", target)
                            track_info["error"] = str(e)
                            if attempt >= MAX_RETRIES:
                                track_info["status"] = "Failed"
                                send_log_to_discord(
-                                    f"Track {track_id} failed after {attempt} attempts",
+                                    f"Track {_track_desc()} failed after {attempt} attempts",
                                    "ERROR",
                                    target,
                                )
@@ -685,14 +700,14 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
                            )
                        else:
                            msg = (
-                                f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
+                                f"Track {_track_desc()} attempt {attempt} failed: {e}\n{tb}"
                            )
                            send_log_to_discord(msg, "ERROR", target)
                            track_info["error"] = str(e)
                            if attempt >= MAX_RETRIES:
                                track_info["status"] = "Failed"
                                send_log_to_discord(
-                                    f"Track {track_id} failed after {attempt} attempts",
+                                    f"Track {_track_desc()} failed after {attempt} attempts",
                                    "ERROR",
                                    target,
                                )
@@ -885,11 +900,367 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
        loop.close()


-# Correct integration of FLAC stream check
-async def process_tracks(track_list):
-    for i, track_id in enumerate(track_list or []):
-        combined_path = f"/tmp/{uuid.uuid4().hex}_combined.m4s"  # Example path
-        if not await check_flac_stream(combined_path):
-            logger.error(f"No FLAC stream found in {combined_path}. Skipping file.")
-            continue
-        # Proceed with decoding pipeline
+# ---------- bulk_video_download ----------
+def bulk_video_download(video_list: list):
+    """
+    RQ job for bulk video downloads:
+      - fetches video metadata and HLS streams
+      - downloads with ffmpeg in highest quality
+      - creates ONE tarball for all videos
+      - returns [tarball_path]
+      - sends relevant messages to Discord
+    """
+    job = get_current_job()
+    job_id = job.id if job else uuid.uuid4().hex
+    target = job.meta.get("target") if job else None
+    staging_root = ROOT_DIR / f"video_{job_id}"
+
+    if job:
+        try:
+            job.meta["video_ids"] = [str(v) for v in (video_list or [])]
+            job.meta["videos"] = []
+            job.meta["progress"] = 0
+            job.meta["tarball"] = None
+            job.meta["status"] = "Started"
+            job.save_meta()
+        except Exception as e:
+            send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target)
+
+    # Job started Discord message
+    asyncio.run(
+        discord_notify(
+            DISCORD_WEBHOOK,
+            title=f"Video Job Started: {job_id}",
+            description=f"Processing `{len(video_list)}` video(s)",
+            target=target,
+            color=0x00FFFF,
+        )
+    )
+
+    async def process_videos(video_list):
+        per_video_meta = []
+        all_final_files = []
+        all_artists = set()
+        (ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
+
+        total = len(video_list or [])
+        for i, video_id in enumerate(video_list or []):
+            print(f"DEBUG: Processing video {i + 1}/{total}: {video_id}")
+            video_info = {
+                "video_id": str(video_id),
+                "title": None,
+                "artist": None,
+                "status": "Pending",
+                "file_path": None,
+                "filename": None,
+                "error": None,
+                "attempts": 0,
+            }
+            attempt = 0
+
+            def _video_desc() -> str:
+                """Format video info for log messages."""
+                title = video_info.get("title") or f"Video {video_id}"
+                artist = video_info.get("artist") or "Unknown"
+                return f"{video_id} - '{title}' by {artist}"
+
+            # Fetch metadata first
+            md = None
+            try:
+                print(f"DEBUG: Fetching metadata for video {video_id}")
+                md = await sr.get_video_metadata(video_id)
+                print(f"DEBUG: Metadata fetched: {bool(md)}")
+
+                if md:
+                    video_info["title"] = md.get("title") or f"Video {video_id}"
+                    video_info["artist"] = md.get("artist") or "Unknown Artist"
+
+            except Exception as meta_err:
+                print(f"VIDEO {video_id}: Metadata fetch failed: {meta_err}")
+                md = None
+
+            while attempt < MAX_RETRIES:
+                attempt += 1
+                video_info["attempts"] = attempt
+
+                try:
+                    # Use sr.download_video which handles HLS and quality selection
+                    print(f"VIDEO {video_id}: Starting download (attempt {attempt})")
+
+                    # Download to temporary location
+                    tmp_dir = Path(f"/tmp/video_{uuid.uuid4().hex}")
+                    tmp_dir.mkdir(parents=True, exist_ok=True)
+
+                    # Add timeout for video download (30 minutes max per video)
+                    try:
+                        file_path = await asyncio.wait_for(
+                            sr.download_video(video_id, str(tmp_dir)),
+                            timeout=1800  # 30 minutes
+                        )
+                    except asyncio.TimeoutError:
+                        print(f"VIDEO {video_id}: Download timed out after 30 minutes")
+                        raise RuntimeError("Download timed out after 30 minutes")
+
+                    if not file_path or not Path(file_path).exists():
+                        raise RuntimeError("Download completed but no file created")
+
+                    # If we didn't get metadata earlier, try again
+                    if not md:
+                        try:
+                            md = await sr.get_video_metadata(video_id)
+                        except Exception:
+                            md = {}
+
+                    md = md or {}
+                    artist_raw = md.get("artist") or "Unknown Artist"
+                    title_raw = md.get("title") or f"Video {video_id}"
+
+                    artist = sanitize_filename(artist_raw)
+                    title = sanitize_filename(title_raw)
+
+                    video_info["title"] = title
+                    video_info["artist"] = artist
+
+                    print(f"VIDEO {video_id}: Processing '{title}' by {artist}")
+
+                    all_artists.add(artist)
+                    video_dir = staging_root / artist
+                    video_dir.mkdir(parents=True, exist_ok=True)
+                    final_file = ensure_unique_path(video_dir / f"{title}.mp4")
+
+                    # Move to final location
+                    print(f"VIDEO {video_id}: Moving to final location...")
+                    shutil.move(str(file_path), str(final_file))
+
+                    # Clean up temp dir
+                    try:
+                        shutil.rmtree(tmp_dir, ignore_errors=True)
+                    except Exception:
+                        pass
+
+                    print(f"VIDEO {video_id}: File moved successfully")
+
+                    # Success
+                    video_info["status"] = "Success"
+                    video_info["file_path"] = str(final_file)
+                    try:
+                        video_info["filename"] = final_file.name
+                    except Exception:
+                        video_info["filename"] = None
+                    video_info["error"] = None
+                    all_final_files.append(final_file)
+
+                    print(
+                        f"VIDEO {video_id}: SUCCESS! Progress: {((i + 1) / total) * 100:.0f}%"
+                    )
+
+                    # Throttle
+                    await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
+
+                    if job:
+                        job.meta["progress"] = int(((i + 1) / total) * 100)
+                        job.meta["videos"] = per_video_meta + [video_info]
+                        job.save_meta()
+                    break
+
+                except Exception as e:
+                    tb = traceback.format_exc()
+                    err_str = str(e).lower()
+
+                    is_not_found = any(
+                        phrase in err_str
+                        for phrase in (
+                            "video not found",
+                            "not found",
+                            "404",
+                            "does not exist",
+                            "no longer available",
+                        )
+                    )
+
+                    if is_not_found:
+                        msg = f"Video {_video_desc()} not found/unavailable, skipping: {e}"
+                        print(msg)
+                        send_log_to_discord(msg, "WARNING", target)
+                        video_info["status"] = "Failed"
+                        video_info["error"] = str(e)
+                        break
+                    else:
+                        msg = f"Video {_video_desc()} attempt {attempt} failed: {e}\n{tb}"
+                        send_log_to_discord(msg, "ERROR", target)
+                        video_info["error"] = str(e)
+                        if attempt >= MAX_RETRIES:
+                            video_info["status"] = "Failed"
+                            send_log_to_discord(
+                                f"Video {_video_desc()} failed after {attempt} attempts",
+                                "ERROR",
+                                target,
+                            )
+                        await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
+
+            # Ensure placeholders for job metadata
+            video_info["title"] = video_info.get("title") or f"Video {video_id}"
+            video_info["artist"] = video_info.get("artist") or "Unknown Artist"
+            if video_info.get("file_path") and not video_info.get("filename"):
+                try:
+                    video_info["filename"] = Path(video_info["file_path"]).name
+                except Exception:
+                    video_info["filename"] = None
+            per_video_meta.append(video_info)
+
+        if not all_final_files:
+            if job:
+                job.meta["tarball"] = None
+                job.meta["status"] = "Failed"
+                job.save_meta()
+            send_log_to_discord(
+                f"No videos were successfully downloaded for job `{job_id}`",
+                "CRITICAL",
+                target,
+            )
+            return []
+
+        # Tarball creation
+        artist_counts = {}
+        for v in per_video_meta:
+            if v["status"] == "Success" and v.get("file_path"):
+                try:
+                    artist = Path(v["file_path"]).relative_to(staging_root).parts[0]
+                except Exception:
+                    artist = "Unknown Artist"
+                artist_counts[artist] = artist_counts.get(artist, 0) + 1
+        top_artist = (
+            sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
+            if artist_counts
+            else "Unknown Artist"
+        )
+
+        target_name = None
+        try:
+            if job and job.meta:
+                target_name = job.meta.get("target")
+        except Exception:
+            target_name = None
+
+        base_label = (
+            sanitize_filename(target_name)
+            if target_name
+            else sanitize_filename(top_artist)
+        )
+        staged_tarball = staging_root / f"{base_label}_videos.tar.gz"
+
+        counter = 1
+        base_name = staged_tarball.stem
+        while staged_tarball.exists():
+            counter += 1
+            staged_tarball = staging_root / f"{base_name} ({counter}).tar.gz"
+
+        final_dir = Path("/storage/music/TRIP/videos")
+        final_dir.mkdir(parents=True, exist_ok=True)
+        final_tarball = ensure_unique_filename_in_dir(final_dir, staged_tarball.name)
+
+        if job:
+            job.meta["status"] = "Compressing"
+            job.save_meta()
+
+        logging.info("Creating video tarball: %s", staged_tarball)
+        await discord_notify(
+            DISCORD_WEBHOOK,
+            title=f"Compressing: Video Job {job_id}",
+            description=f"Creating tarball: `{len(all_final_files)}` video(s).\nStaging path: {staged_tarball}",
+            color=0xFFA500,
+            target=target,
+        )
+        try:
+            subprocess.run(
+                [
+                    "tar",
+                    "-I",
+                    "pigz -9",
+                    "-cf",
+                    str(staged_tarball),
+                    "-C",
+                    str(staging_root),
+                ]
+                + [str(f.relative_to(staging_root)) for f in all_final_files],
+                check=True,
+            )
+            for f in all_final_files:
+                try:
+                    os.remove(f)
+                except Exception:
+                    pass
+        except FileNotFoundError:
+            send_log_to_discord(
+                "pigz not available, falling back to tarfile (slower).",
+                "WARNING",
+                target,
+            )
+            with tarfile.open(staged_tarball, "w:gz") as tar:
+                for f in all_final_files:
+                    try:
+                        arcname = f.relative_to(staging_root)
+                    except ValueError:
+                        arcname = f.name
+                    tar.add(f, arcname=str(arcname))
+                    try:
+                        os.remove(f)
+                    except Exception:
+                        pass
+        except Exception as e:
+            send_log_to_discord(f"Video tar creation failed: {e}", "ERROR", target)
+            if job:
+                job.meta["status"] = "compress_failed"
+                job.save_meta()
+            return []
+
+        if not staged_tarball.exists():
+            send_log_to_discord(
+                f"Video tarball was not created: `{staged_tarball}`", "CRITICAL", target
+            )
+            if job:
+                job.meta["status"] = "compress_failed"
+                job.save_meta()
+            return []
+
+        try:
+            staged_tarball.rename(final_tarball)
+        except Exception:
+            shutil.move(str(staged_tarball), str(final_tarball))
+
+        await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
+
+        if job:
+            job.meta["tarball"] = str(final_tarball)
+            job.meta["progress"] = 100
+            job.meta["status"] = "Completed"
+            job.save_meta()
+
+        # Job completed Discord message
+        completed = len(all_final_files)
+        failed = len(video_list) - completed
+        await discord_notify(
+            DISCORD_WEBHOOK,
+            title=f"Video Job Completed: {job_id}",
+            description=f"Processed `{len(video_list)}` video(s).\nCompleted: `{completed}`\nFailed: `{failed}`\nTarball: `{final_tarball}`",
+            target=target,
+            color=0x00FF00,
+        )
+
+        logging.info("Video job %s finished, tarball: %s", job_id, final_tarball)
+
+        return [str(final_tarball)]
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        return loop.run_until_complete(process_videos(video_list))
+    except Exception as e:
+        send_log_to_discord(
+            f"bulk_video_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target
+        )
+        if job:
+            job.meta["status"] = "Failed"
+            job.save_meta()
+    finally:
+        loop.close()