Add bulk video download functionality

- Implemented `bulk_video_download` function to handle video downloads, including metadata fetching, HLS stream handling, and tarball creation. - Enhanced `bulk_download` function in `rip_background.py` to improve error logging with formatted track descriptions. - Added video search and metadata retrieval methods in `sr_wrapper.py` for better integration with Tidal's video API. - Updated Tidal client credentials
2026-02-18 13:38:26 -05:00
parent 9d16c96490
commit d6689b9c38
4 changed files with 1257 additions and 81 deletions
--- a/utils/sr_wrapper.py
+++ b/utils/sr_wrapper.py
@@ -12,12 +12,19 @@ import json
 import os
 import aiohttp
 import time
+import base64

 # Monkey-patch streamrip's Tidal client credentials BEFORE importing TidalClient
 import streamrip.client.tidal as _tidal_module  # type: ignore  # noqa: E402

-_tidal_module.CLIENT_ID = "fX2JxdmntZWK0ixT"
-_tidal_module.CLIENT_SECRET = "1Nn9AfDAjxrgJFJbKNWLeAyKGVGmINuXPPLHVXAvxAg="
+CLIENT_ID = base64.b64decode("ZlgySnhkbW50WldLMGl4VA==").decode("iso-8859-1")
+CLIENT_SECRET = base64.b64decode(
+    "MU5tNUFmREFqeHJnSkZKYktOV0xlQXlLR1ZHbUlOdVhQUExIVlhBdnhBZz0=",
+).decode("iso-8859-1")
+
+_tidal_module.CLIENT_ID = CLIENT_ID
+
+_tidal_module.CLIENT_SECRET = CLIENT_SECRET
 _tidal_module.AUTH = aiohttp.BasicAuth(
    login=_tidal_module.CLIENT_ID, password=_tidal_module.CLIENT_SECRET
 )
@@ -306,14 +313,21 @@ class SRUtil:
        if not token_expiry:
            return True  # No expiry info means we should refresh
        try:
-            # token_expiry is typically an ISO timestamp string
-            if isinstance(token_expiry, str):
-                from datetime import datetime
-
-                expiry_dt = datetime.fromisoformat(token_expiry.replace("Z", "+00:00"))
-                expiry_ts = expiry_dt.timestamp()
-            else:
+            # token_expiry can be a Unix timestamp (float/int/string) or ISO string
+            if not isinstance(token_expiry, str):
                expiry_ts = float(token_expiry)
+            else:
+                # Try parsing as a numeric Unix timestamp first
+                try:
+                    expiry_ts = float(token_expiry)
+                except ValueError:
+                    # Fall back to ISO format string
+                    from datetime import datetime
+
+                    expiry_dt = datetime.fromisoformat(
+                        token_expiry.replace("Z", "+00:00")
+                    )
+                    expiry_ts = expiry_dt.timestamp()
            return expiry_ts < (time.time() + TIDAL_TOKEN_REFRESH_BUFFER)
        except Exception as e:
            logging.warning("Failed to parse token expiry '%s': %s", token_expiry, e)
@@ -1167,6 +1181,459 @@ class SRUtil:
            logging.critical("Error: %s", str(e))
            return False

+    # =========================================================================
+    # Video Support
+    # =========================================================================
+
+    async def search_videos(self, query: str, limit: int = 50) -> Optional[list[dict]]:
+        """Search for videos by query string.
+
+        Args:
+            query: Search query (artist name, song title, etc.)
+            limit: Maximum number of results to return.
+
+        Returns:
+            List of video results with id, title, artist, duration, etc.
+        """
+        max_retries = 4
+        delay = 1.0
+        for attempt in range(max_retries):
+            try:
+                results = await self._safe_api_call(
+                    self.streamrip_client.search,
+                    media_type="video",
+                    query=query,
+                    limit=limit,
+                    retries=3,
+                )
+                break
+            except Exception as e:
+                msg = str(e)
+                if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
+                    await asyncio.sleep(delay)
+                    delay *= 2
+                    continue
+                else:
+                    logging.warning("Video search failed: %s", e)
+                    return None
+        else:
+            return None
+
+        if not results:
+            return None
+
+        # Results can be paged - get items from first page
+        if isinstance(results, list):
+            results_page = results[0] if results else {}
+        else:
+            results_page = results
+
+        items = results_page.get("items", []) if isinstance(results_page, dict) else []
+        if not items:
+            return None
+
+        videos_out = []
+        for item in items:
+            artist_info = item.get("artist") or item.get("artists", [{}])[0] if item.get("artists") else {}
+            artist_name = artist_info.get("name", "Unknown Artist") if isinstance(artist_info, dict) else str(artist_info)
+
+            videos_out.append({
+                "id": item.get("id"),
+                "title": item.get("title"),
+                "artist": artist_name,
+                "duration": item.get("duration"),
+                "duration_formatted": self.format_duration(item.get("duration")),
+                "release_date": item.get("releaseDate"),
+                "image_id": item.get("imageId"),
+                "image_url": (
+                    f"https://resources.tidal.com/images/{item.get('imageId').replace('-', '/')}/640x360.jpg"
+                    if item.get("imageId")
+                    else None
+                ),
+                "quality": item.get("quality"),
+            })
+
+        return videos_out
+
+    async def get_video_metadata(self, video_id: int) -> Optional[dict]:
+        """Get metadata for a specific video by ID.
+
+        Args:
+            video_id: The Tidal video ID.
+
+        Returns:
+            Video metadata dict or None if not found.
+        """
+        video_id_str = str(video_id)
+        try:
+            metadata = await self._safe_api_call(
+                self.streamrip_client.get_metadata,
+                item_id=video_id_str,
+                media_type="video",
+                retries=3,
+            )
+        except Exception as e:
+            logging.warning("get_video_metadata failed for %s: %s", video_id, e)
+            return None
+
+        if not metadata:
+            return None
+
+        artist_info = metadata.get("artist") or (metadata.get("artists", [{}])[0] if metadata.get("artists") else {})
+        artist_name = artist_info.get("name", "Unknown Artist") if isinstance(artist_info, dict) else str(artist_info)
+
+        return {
+            "id": metadata.get("id"),
+            "title": metadata.get("title"),
+            "artist": artist_name,
+            "artists": [a.get("name") for a in metadata.get("artists", [])],
+            "duration": metadata.get("duration"),
+            "duration_formatted": self.format_duration(metadata.get("duration")),
+            "release_date": metadata.get("releaseDate"),
+            "image_id": metadata.get("imageId"),
+            "image_url": (
+                f"https://resources.tidal.com/images/{metadata.get('imageId').replace('-', '/')}/1280x720.jpg"
+                if metadata.get("imageId")
+                else None
+            ),
+            "thumbnail_url": (
+                f"https://resources.tidal.com/images/{metadata.get('imageId').replace('-', '/')}/640x360.jpg"
+                if metadata.get("imageId")
+                else None
+            ),
+            "quality": metadata.get("quality"),
+            "explicit": metadata.get("explicit"),
+            "album": metadata.get("album", {}).get("title") if metadata.get("album") else None,
+            "album_id": metadata.get("album", {}).get("id") if metadata.get("album") else None,
+        }
+
+    async def get_video_stream_url(self, video_id: int) -> Optional[str]:
+        """Get the HLS stream URL for a video.
+
+        Args:
+            video_id: The Tidal video ID.
+
+        Returns:
+            The highest quality video HLS variant URL (.m3u8) or None if not available.
+        """
+        video_id_str = str(video_id)
+        logging.info("VIDEO %s: Fetching stream URL...", video_id)
+        try:
+            # First try the standard streamrip method
+            logging.info("VIDEO %s: Trying streamrip get_video_file_url...", video_id)
+            url = await self._safe_api_call(
+                self.streamrip_client.get_video_file_url,
+                video_id=video_id_str,
+                retries=2,
+            )
+            if url:
+                logging.info("VIDEO %s: Got stream URL via streamrip", video_id)
+            return url if url else None
+        except Exception as e:
+            # Streamrip's get_video_file_url may fail if Tidal returns HLS manifest
+            # directly instead of a JSON with URLs. Try to get the manifest URL directly.
+            err_msg = str(e)
+            logging.info("VIDEO %s: streamrip method failed (%s), trying fallback...", video_id, err_msg[:100])
+            if "mpegurl" in err_msg.lower() or ".m3u8" in err_msg:
+                # Extract the master manifest URL from the error message
+                import re
+                m3u8_match = re.search(r"(https://[^\s'\"]+\.m3u8[^\s'\"]*)", err_msg)
+                if m3u8_match:
+                    master_url = m3u8_match.group(1)
+                    logging.info("VIDEO %s: Extracted HLS master URL from error", video_id)
+                    # Try to get the highest quality variant from the master playlist
+                    best_url = await self._get_best_variant_from_master(master_url)
+                    return best_url or master_url
+            
+            # Fall back to fetching the manifest URL directly from Tidal API
+            try:
+                logging.info("VIDEO %s: Trying direct API manifest fetch...", video_id)
+                result = await self._get_video_manifest_url(video_id_str)
+                if result:
+                    logging.info("VIDEO %s: Got stream URL via direct API", video_id)
+                return result
+            except Exception as e2:
+                logging.warning("get_video_stream_url failed for %s: %s (fallback: %s)", video_id, e, e2)
+                return None
+
+    async def _get_best_variant_from_master(self, master_url: str) -> Optional[str]:
+        """Parse HLS master playlist and return the highest quality variant URL."""
+        import re
+        
+        try:
+            # Ensure we have a session
+            if not hasattr(self.streamrip_client, 'session') or not self.streamrip_client.session:
+                self.streamrip_client.session = await self.streamrip_client.get_session()
+            
+            async with self.streamrip_client.session.get(master_url) as resp:
+                if resp.status != 200:
+                    return None
+                playlist_text = await resp.text()
+                
+                # Parse HLS master playlist for variant streams
+                stream_pattern = re.compile(
+                    r'#EXT-X-STREAM-INF:.*?BANDWIDTH=(\d+).*?\n([^\n#]+)',
+                    re.MULTILINE
+                )
+                matches = stream_pattern.findall(playlist_text)
+                
+                if matches:
+                    # Sort by bandwidth (highest quality = highest bandwidth)
+                    matches.sort(key=lambda x: int(x[0]), reverse=True)
+                    best_variant = matches[0][1].strip()
+                    
+                    # If it's a relative URL, make it absolute
+                    if not best_variant.startswith('http'):
+                        base_url = master_url.rsplit('/', 1)[0]
+                        best_variant = f"{base_url}/{best_variant}"
+                    
+                    logging.info("Selected highest quality variant: bandwidth=%s", matches[0][0])
+                    return best_variant
+        except Exception as e:
+            logging.warning("Failed to parse HLS master playlist: %s", e)
+        
+        return None
+
+    async def _get_video_manifest_url(self, video_id: str) -> Optional[str]:
+        """Directly fetch the HLS manifest URL from Tidal API.
+        
+        This is a fallback when streamrip's method fails due to format changes.
+        Returns the highest quality variant URL from the HLS master playlist.
+        """
+        import base64
+        import re
+        
+        params = {
+            "videoquality": "HIGH",
+            "playbackmode": "STREAM",
+            "assetpresentation": "FULL",
+        }
+        
+        # Ensure we have a session
+        if not hasattr(self.streamrip_client, 'session') or not self.streamrip_client.session:
+            self.streamrip_client.session = await self.streamrip_client.get_session()
+        
+        # Make the API request
+        resp = await self.streamrip_client._api_request(
+            f"videos/{video_id}/playbackinfopostpaywall", params=params
+        )
+        
+        if not resp or "manifest" not in resp:
+            return None
+        
+        # Decode the manifest
+        manifest_data = json.loads(base64.b64decode(resp["manifest"]).decode("utf-8"))
+        
+        # The manifest should contain URLs - get the master playlist URL
+        urls = manifest_data.get("urls", [])
+        if not urls:
+            return None
+        
+        master_url = urls[0]
+        
+        # Try to fetch the master playlist and find the highest quality variant
+        try:
+            async with self.streamrip_client.session.get(master_url) as resp:
+                if resp.status == 200:
+                    playlist_text = await resp.text()
+                    
+                    # Parse HLS master playlist for variant streams
+                    # Look for lines like: #EXT-X-STREAM-INF:BANDWIDTH=...,RESOLUTION=1920x1080
+                    # followed by the variant URL
+                    stream_pattern = re.compile(
+                        r'#EXT-X-STREAM-INF:.*?BANDWIDTH=(\d+).*?\n([^\n#]+)',
+                        re.MULTILINE
+                    )
+                    matches = stream_pattern.findall(playlist_text)
+                    
+                    if matches:
+                        # Sort by bandwidth (highest quality = highest bandwidth)
+                        matches.sort(key=lambda x: int(x[0]), reverse=True)
+                        best_variant = matches[0][1].strip()
+                        
+                        # If it's a relative URL, make it absolute
+                        if not best_variant.startswith('http'):
+                            base_url = master_url.rsplit('/', 1)[0]
+                            best_variant = f"{base_url}/{best_variant}"
+                        
+                        logging.info("Selected highest quality video variant: bandwidth=%s", matches[0][0])
+                        return best_variant
+        except Exception as e:
+            logging.warning("Failed to parse HLS master playlist: %s", e)
+        
+        # Fall back to returning the master URL (ffmpeg will pick a variant)
+        return master_url
+
+    async def download_video(self, video_id: int, output_path: Optional[str] = None) -> Optional[str]:
+        """Download a video by ID.
+
+        Args:
+            video_id: The Tidal video ID.
+            output_path: Optional path to save the video. Can be a directory or full file path.
+                         If not provided, a temp path is used.
+
+        Returns:
+            The path to the downloaded video file, or None on failure.
+        """
+        try:
+            logging.info("VIDEO %s: Getting stream URL...", video_id)
+            video_url = await self.get_video_stream_url(video_id)
+            if not video_url:
+                logging.warning("No video URL for video ID: %s", video_id)
+                return None
+
+            logging.info("VIDEO %s: Got stream URL, preparing download...", video_id)
+            
+            # Determine output path
+            if not output_path:
+                unique = hashlib.sha256(uuid4().bytes).hexdigest()[:16]
+                dl_folder_path = f"{self.streamrip_config.session.downloads.folder}/{unique}"
+                try:
+                    os.makedirs(dl_folder_path, exist_ok=True)
+                except Exception:
+                    pass
+                output_path = f"{dl_folder_path}/{video_id}.mp4"
+            elif os.path.isdir(output_path):
+                # If output_path is a directory, append the video filename
+                output_path = os.path.join(output_path, f"{video_id}.mp4")
+
+            # Video URLs are HLS manifests - use ffmpeg to download
+            logging.info("VIDEO %s: Starting ffmpeg HLS download to %s", video_id, output_path)
+            print(f"VIDEO {video_id}: Starting ffmpeg download...")
+            
+            cmd = [
+                "ffmpeg",
+                "-nostdin",  # Don't read from stdin - prevents SIGTTIN in background
+                "-hide_banner",
+                "-loglevel", "warning",
+                "-analyzeduration", "10M",
+                "-probesize", "10M",
+                "-i", video_url,
+                "-c:v", "copy",
+                "-c:a", "aac",
+                "-b:a", "256k",
+                "-af", "aresample=async=1:first_pts=0",
+                "-y",
+                output_path,
+            ]
+            
+            proc = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdin=asyncio.subprocess.DEVNULL,
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            
+            # Use communicate() to avoid buffer deadlocks
+            _, stderr = await proc.communicate()
+            
+            if proc.returncode != 0:
+                stderr_text = stderr.decode().strip() if stderr else "Unknown error"
+                logging.error("ffmpeg video download failed for %s: %s", video_id, stderr_text)
+                return None
+            
+            print(f"VIDEO {video_id}: ffmpeg completed, verifying file...")
+            
+            if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
+                logging.error("Video download completed but file missing or empty")
+                return None
+            
+            # Verify the MP4 is valid (has moov atom)
+            verify_cmd = [
+                "ffprobe",
+                "-v", "error",
+                "-show_entries", "format=duration",
+                "-of", "default=noprint_wrappers=1:nokey=1",
+                output_path,
+            ]
+            verify_proc = await asyncio.create_subprocess_exec(
+                *verify_cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            verify_stdout, verify_stderr = await verify_proc.communicate()
+            
+            if verify_proc.returncode != 0:
+                stderr_text = verify_stderr.decode().strip() if verify_stderr else ""
+                logging.error("Downloaded video is corrupt (moov atom missing?): %s", stderr_text)
+                # Clean up corrupt file
+                try:
+                    os.remove(output_path)
+                except Exception:
+                    pass
+                return None
+            
+            duration = verify_stdout.decode().strip() if verify_stdout else "unknown"
+            logging.info("Video %s downloaded to %s (%d bytes, duration: %ss)", 
+                        video_id, output_path, os.path.getsize(output_path), duration)
+            return output_path
+
+        except Exception as e:
+            logging.critical("Video download error for %s: %s", video_id, e)
+            return None
+        
+        return None  # Should not reach here, but satisfy type checker
+
+    async def get_videos_by_artist_id(self, artist_id: int, limit: int = 50) -> Optional[list[dict]]:
+        """Get videos by artist ID.
+
+        Args:
+            artist_id: The Tidal artist ID.
+            limit: Maximum number of videos to return.
+
+        Returns:
+            List of videos by the artist or None if not found.
+        """
+        artist_id_str = str(artist_id)
+        
+        # Ensure we have a session
+        if not hasattr(self.streamrip_client, 'session') or not self.streamrip_client.session:
+            self.streamrip_client.session = await self.streamrip_client.get_session()
+        
+        try:
+            # Use the direct Tidal API endpoint for artist videos
+            resp = await self._safe_api_call(
+                self.streamrip_client._api_request,
+                f"artists/{artist_id_str}/videos",
+                params={"limit": limit, "offset": 0},
+                retries=3,
+            )
+        except Exception as e:
+            logging.warning("get_videos_by_artist_id API call failed: %s", e)
+            return None
+
+        if not resp:
+            return None
+
+        # The response has an "items" array
+        videos = resp.get("items", [])
+        if not videos:
+            return None
+
+        videos_out = []
+        for video in videos:
+            artist_info = video.get("artist") or (video.get("artists", [{}])[0] if video.get("artists") else {})
+            artist_name = artist_info.get("name", "Unknown Artist") if isinstance(artist_info, dict) else str(artist_info)
+
+            videos_out.append({
+                "id": video.get("id"),
+                "title": video.get("title"),
+                "artist": artist_name,
+                "duration": video.get("duration"),
+                "duration_formatted": self.format_duration(video.get("duration")),
+                "release_date": video.get("releaseDate"),
+                "image_id": video.get("imageId"),
+                "image_url": (
+                    f"https://resources.tidal.com/images/{video.get('imageId').replace('-', '/')}/640x360.jpg"
+                    if video.get("imageId")
+                    else None
+                ),
+            })
+
+        return videos_out
+
+        return videos_out
+
    async def get_lrc_by_track_id(self, track_id: int) -> Optional[str]:
        """Get LRC lyrics by track ID."""
        logging.debug(f"SR: Fetching metadata for track ID {track_id}")