Add bulk video download functionality

- Implemented `bulk_video_download` function to handle video downloads, including metadata fetching, HLS stream handling, and tarball creation.
- Enhanced `bulk_download` function in `rip_background.py` to improve error logging with formatted track descriptions.
- Added video search and metadata retrieval methods in `sr_wrapper.py` for better integration with Tidal's video API.
- Updated Tidal client credentials
This commit is contained in:
2026-02-18 13:38:26 -05:00
parent 9d16c96490
commit d6689b9c38
4 changed files with 1257 additions and 81 deletions

View File

@@ -12,12 +12,19 @@ import json
import os
import aiohttp
import time
import base64
# Monkey-patch streamrip's Tidal client credentials BEFORE importing TidalClient
import streamrip.client.tidal as _tidal_module # type: ignore # noqa: E402
_tidal_module.CLIENT_ID = "fX2JxdmntZWK0ixT"
_tidal_module.CLIENT_SECRET = "1Nn9AfDAjxrgJFJbKNWLeAyKGVGmINuXPPLHVXAvxAg="
CLIENT_ID = base64.b64decode("ZlgySnhkbW50WldLMGl4VA==").decode("iso-8859-1")
CLIENT_SECRET = base64.b64decode(
"MU5tNUFmREFqeHJnSkZKYktOV0xlQXlLR1ZHbUlOdVhQUExIVlhBdnhBZz0=",
).decode("iso-8859-1")
_tidal_module.CLIENT_ID = CLIENT_ID
_tidal_module.CLIENT_SECRET = CLIENT_SECRET
_tidal_module.AUTH = aiohttp.BasicAuth(
login=_tidal_module.CLIENT_ID, password=_tidal_module.CLIENT_SECRET
)
@@ -306,14 +313,21 @@ class SRUtil:
if not token_expiry:
return True # No expiry info means we should refresh
try:
# token_expiry is typically an ISO timestamp string
if isinstance(token_expiry, str):
from datetime import datetime
expiry_dt = datetime.fromisoformat(token_expiry.replace("Z", "+00:00"))
expiry_ts = expiry_dt.timestamp()
else:
# token_expiry can be a Unix timestamp (float/int/string) or ISO string
if not isinstance(token_expiry, str):
expiry_ts = float(token_expiry)
else:
# Try parsing as a numeric Unix timestamp first
try:
expiry_ts = float(token_expiry)
except ValueError:
# Fall back to ISO format string
from datetime import datetime
expiry_dt = datetime.fromisoformat(
token_expiry.replace("Z", "+00:00")
)
expiry_ts = expiry_dt.timestamp()
return expiry_ts < (time.time() + TIDAL_TOKEN_REFRESH_BUFFER)
except Exception as e:
logging.warning("Failed to parse token expiry '%s': %s", token_expiry, e)
@@ -1167,6 +1181,459 @@ class SRUtil:
logging.critical("Error: %s", str(e))
return False
# =========================================================================
# Video Support
# =========================================================================
async def search_videos(self, query: str, limit: int = 50) -> Optional[list[dict]]:
"""Search for videos by query string.
Args:
query: Search query (artist name, song title, etc.)
limit: Maximum number of results to return.
Returns:
List of video results with id, title, artist, duration, etc.
"""
max_retries = 4
delay = 1.0
for attempt in range(max_retries):
try:
results = await self._safe_api_call(
self.streamrip_client.search,
media_type="video",
query=query,
limit=limit,
retries=3,
)
break
except Exception as e:
msg = str(e)
if ("400" in msg or "429" in msg) and attempt < max_retries - 1:
await asyncio.sleep(delay)
delay *= 2
continue
else:
logging.warning("Video search failed: %s", e)
return None
else:
return None
if not results:
return None
# Results can be paged - get items from first page
if isinstance(results, list):
results_page = results[0] if results else {}
else:
results_page = results
items = results_page.get("items", []) if isinstance(results_page, dict) else []
if not items:
return None
videos_out = []
for item in items:
artist_info = item.get("artist") or item.get("artists", [{}])[0] if item.get("artists") else {}
artist_name = artist_info.get("name", "Unknown Artist") if isinstance(artist_info, dict) else str(artist_info)
videos_out.append({
"id": item.get("id"),
"title": item.get("title"),
"artist": artist_name,
"duration": item.get("duration"),
"duration_formatted": self.format_duration(item.get("duration")),
"release_date": item.get("releaseDate"),
"image_id": item.get("imageId"),
"image_url": (
f"https://resources.tidal.com/images/{item.get('imageId').replace('-', '/')}/640x360.jpg"
if item.get("imageId")
else None
),
"quality": item.get("quality"),
})
return videos_out
async def get_video_metadata(self, video_id: int) -> Optional[dict]:
"""Get metadata for a specific video by ID.
Args:
video_id: The Tidal video ID.
Returns:
Video metadata dict or None if not found.
"""
video_id_str = str(video_id)
try:
metadata = await self._safe_api_call(
self.streamrip_client.get_metadata,
item_id=video_id_str,
media_type="video",
retries=3,
)
except Exception as e:
logging.warning("get_video_metadata failed for %s: %s", video_id, e)
return None
if not metadata:
return None
artist_info = metadata.get("artist") or (metadata.get("artists", [{}])[0] if metadata.get("artists") else {})
artist_name = artist_info.get("name", "Unknown Artist") if isinstance(artist_info, dict) else str(artist_info)
return {
"id": metadata.get("id"),
"title": metadata.get("title"),
"artist": artist_name,
"artists": [a.get("name") for a in metadata.get("artists", [])],
"duration": metadata.get("duration"),
"duration_formatted": self.format_duration(metadata.get("duration")),
"release_date": metadata.get("releaseDate"),
"image_id": metadata.get("imageId"),
"image_url": (
f"https://resources.tidal.com/images/{metadata.get('imageId').replace('-', '/')}/1280x720.jpg"
if metadata.get("imageId")
else None
),
"thumbnail_url": (
f"https://resources.tidal.com/images/{metadata.get('imageId').replace('-', '/')}/640x360.jpg"
if metadata.get("imageId")
else None
),
"quality": metadata.get("quality"),
"explicit": metadata.get("explicit"),
"album": metadata.get("album", {}).get("title") if metadata.get("album") else None,
"album_id": metadata.get("album", {}).get("id") if metadata.get("album") else None,
}
async def get_video_stream_url(self, video_id: int) -> Optional[str]:
"""Get the HLS stream URL for a video.
Args:
video_id: The Tidal video ID.
Returns:
The highest quality video HLS variant URL (.m3u8) or None if not available.
"""
video_id_str = str(video_id)
logging.info("VIDEO %s: Fetching stream URL...", video_id)
try:
# First try the standard streamrip method
logging.info("VIDEO %s: Trying streamrip get_video_file_url...", video_id)
url = await self._safe_api_call(
self.streamrip_client.get_video_file_url,
video_id=video_id_str,
retries=2,
)
if url:
logging.info("VIDEO %s: Got stream URL via streamrip", video_id)
return url if url else None
except Exception as e:
# Streamrip's get_video_file_url may fail if Tidal returns HLS manifest
# directly instead of a JSON with URLs. Try to get the manifest URL directly.
err_msg = str(e)
logging.info("VIDEO %s: streamrip method failed (%s), trying fallback...", video_id, err_msg[:100])
if "mpegurl" in err_msg.lower() or ".m3u8" in err_msg:
# Extract the master manifest URL from the error message
import re
m3u8_match = re.search(r"(https://[^\s'\"]+\.m3u8[^\s'\"]*)", err_msg)
if m3u8_match:
master_url = m3u8_match.group(1)
logging.info("VIDEO %s: Extracted HLS master URL from error", video_id)
# Try to get the highest quality variant from the master playlist
best_url = await self._get_best_variant_from_master(master_url)
return best_url or master_url
# Fall back to fetching the manifest URL directly from Tidal API
try:
logging.info("VIDEO %s: Trying direct API manifest fetch...", video_id)
result = await self._get_video_manifest_url(video_id_str)
if result:
logging.info("VIDEO %s: Got stream URL via direct API", video_id)
return result
except Exception as e2:
logging.warning("get_video_stream_url failed for %s: %s (fallback: %s)", video_id, e, e2)
return None
async def _get_best_variant_from_master(self, master_url: str) -> Optional[str]:
"""Parse HLS master playlist and return the highest quality variant URL."""
import re
try:
# Ensure we have a session
if not hasattr(self.streamrip_client, 'session') or not self.streamrip_client.session:
self.streamrip_client.session = await self.streamrip_client.get_session()
async with self.streamrip_client.session.get(master_url) as resp:
if resp.status != 200:
return None
playlist_text = await resp.text()
# Parse HLS master playlist for variant streams
stream_pattern = re.compile(
r'#EXT-X-STREAM-INF:.*?BANDWIDTH=(\d+).*?\n([^\n#]+)',
re.MULTILINE
)
matches = stream_pattern.findall(playlist_text)
if matches:
# Sort by bandwidth (highest quality = highest bandwidth)
matches.sort(key=lambda x: int(x[0]), reverse=True)
best_variant = matches[0][1].strip()
# If it's a relative URL, make it absolute
if not best_variant.startswith('http'):
base_url = master_url.rsplit('/', 1)[0]
best_variant = f"{base_url}/{best_variant}"
logging.info("Selected highest quality variant: bandwidth=%s", matches[0][0])
return best_variant
except Exception as e:
logging.warning("Failed to parse HLS master playlist: %s", e)
return None
async def _get_video_manifest_url(self, video_id: str) -> Optional[str]:
"""Directly fetch the HLS manifest URL from Tidal API.
This is a fallback when streamrip's method fails due to format changes.
Returns the highest quality variant URL from the HLS master playlist.
"""
import base64
import re
params = {
"videoquality": "HIGH",
"playbackmode": "STREAM",
"assetpresentation": "FULL",
}
# Ensure we have a session
if not hasattr(self.streamrip_client, 'session') or not self.streamrip_client.session:
self.streamrip_client.session = await self.streamrip_client.get_session()
# Make the API request
resp = await self.streamrip_client._api_request(
f"videos/{video_id}/playbackinfopostpaywall", params=params
)
if not resp or "manifest" not in resp:
return None
# Decode the manifest
manifest_data = json.loads(base64.b64decode(resp["manifest"]).decode("utf-8"))
# The manifest should contain URLs - get the master playlist URL
urls = manifest_data.get("urls", [])
if not urls:
return None
master_url = urls[0]
# Try to fetch the master playlist and find the highest quality variant
try:
async with self.streamrip_client.session.get(master_url) as resp:
if resp.status == 200:
playlist_text = await resp.text()
# Parse HLS master playlist for variant streams
# Look for lines like: #EXT-X-STREAM-INF:BANDWIDTH=...,RESOLUTION=1920x1080
# followed by the variant URL
stream_pattern = re.compile(
r'#EXT-X-STREAM-INF:.*?BANDWIDTH=(\d+).*?\n([^\n#]+)',
re.MULTILINE
)
matches = stream_pattern.findall(playlist_text)
if matches:
# Sort by bandwidth (highest quality = highest bandwidth)
matches.sort(key=lambda x: int(x[0]), reverse=True)
best_variant = matches[0][1].strip()
# If it's a relative URL, make it absolute
if not best_variant.startswith('http'):
base_url = master_url.rsplit('/', 1)[0]
best_variant = f"{base_url}/{best_variant}"
logging.info("Selected highest quality video variant: bandwidth=%s", matches[0][0])
return best_variant
except Exception as e:
logging.warning("Failed to parse HLS master playlist: %s", e)
# Fall back to returning the master URL (ffmpeg will pick a variant)
return master_url
async def download_video(self, video_id: int, output_path: Optional[str] = None) -> Optional[str]:
"""Download a video by ID.
Args:
video_id: The Tidal video ID.
output_path: Optional path to save the video. Can be a directory or full file path.
If not provided, a temp path is used.
Returns:
The path to the downloaded video file, or None on failure.
"""
try:
logging.info("VIDEO %s: Getting stream URL...", video_id)
video_url = await self.get_video_stream_url(video_id)
if not video_url:
logging.warning("No video URL for video ID: %s", video_id)
return None
logging.info("VIDEO %s: Got stream URL, preparing download...", video_id)
# Determine output path
if not output_path:
unique = hashlib.sha256(uuid4().bytes).hexdigest()[:16]
dl_folder_path = f"{self.streamrip_config.session.downloads.folder}/{unique}"
try:
os.makedirs(dl_folder_path, exist_ok=True)
except Exception:
pass
output_path = f"{dl_folder_path}/{video_id}.mp4"
elif os.path.isdir(output_path):
# If output_path is a directory, append the video filename
output_path = os.path.join(output_path, f"{video_id}.mp4")
# Video URLs are HLS manifests - use ffmpeg to download
logging.info("VIDEO %s: Starting ffmpeg HLS download to %s", video_id, output_path)
print(f"VIDEO {video_id}: Starting ffmpeg download...")
cmd = [
"ffmpeg",
"-nostdin", # Don't read from stdin - prevents SIGTTIN in background
"-hide_banner",
"-loglevel", "warning",
"-analyzeduration", "10M",
"-probesize", "10M",
"-i", video_url,
"-c:v", "copy",
"-c:a", "aac",
"-b:a", "256k",
"-af", "aresample=async=1:first_pts=0",
"-y",
output_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd,
stdin=asyncio.subprocess.DEVNULL,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.PIPE,
)
# Use communicate() to avoid buffer deadlocks
_, stderr = await proc.communicate()
if proc.returncode != 0:
stderr_text = stderr.decode().strip() if stderr else "Unknown error"
logging.error("ffmpeg video download failed for %s: %s", video_id, stderr_text)
return None
print(f"VIDEO {video_id}: ffmpeg completed, verifying file...")
if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
logging.error("Video download completed but file missing or empty")
return None
# Verify the MP4 is valid (has moov atom)
verify_cmd = [
"ffprobe",
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
output_path,
]
verify_proc = await asyncio.create_subprocess_exec(
*verify_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
verify_stdout, verify_stderr = await verify_proc.communicate()
if verify_proc.returncode != 0:
stderr_text = verify_stderr.decode().strip() if verify_stderr else ""
logging.error("Downloaded video is corrupt (moov atom missing?): %s", stderr_text)
# Clean up corrupt file
try:
os.remove(output_path)
except Exception:
pass
return None
duration = verify_stdout.decode().strip() if verify_stdout else "unknown"
logging.info("Video %s downloaded to %s (%d bytes, duration: %ss)",
video_id, output_path, os.path.getsize(output_path), duration)
return output_path
except Exception as e:
logging.critical("Video download error for %s: %s", video_id, e)
return None
return None # Should not reach here, but satisfy type checker
async def get_videos_by_artist_id(self, artist_id: int, limit: int = 50) -> Optional[list[dict]]:
"""Get videos by artist ID.
Args:
artist_id: The Tidal artist ID.
limit: Maximum number of videos to return.
Returns:
List of videos by the artist or None if not found.
"""
artist_id_str = str(artist_id)
# Ensure we have a session
if not hasattr(self.streamrip_client, 'session') or not self.streamrip_client.session:
self.streamrip_client.session = await self.streamrip_client.get_session()
try:
# Use the direct Tidal API endpoint for artist videos
resp = await self._safe_api_call(
self.streamrip_client._api_request,
f"artists/{artist_id_str}/videos",
params={"limit": limit, "offset": 0},
retries=3,
)
except Exception as e:
logging.warning("get_videos_by_artist_id API call failed: %s", e)
return None
if not resp:
return None
# The response has an "items" array
videos = resp.get("items", [])
if not videos:
return None
videos_out = []
for video in videos:
artist_info = video.get("artist") or (video.get("artists", [{}])[0] if video.get("artists") else {})
artist_name = artist_info.get("name", "Unknown Artist") if isinstance(artist_info, dict) else str(artist_info)
videos_out.append({
"id": video.get("id"),
"title": video.get("title"),
"artist": artist_name,
"duration": video.get("duration"),
"duration_formatted": self.format_duration(video.get("duration")),
"release_date": video.get("releaseDate"),
"image_id": video.get("imageId"),
"image_url": (
f"https://resources.tidal.com/images/{video.get('imageId').replace('-', '/')}/640x360.jpg"
if video.get("imageId")
else None
),
})
return videos_out
return videos_out
async def get_lrc_by_track_id(self, track_id: int) -> Optional[str]:
"""Get LRC lyrics by track ID."""
logging.debug(f"SR: Fetching metadata for track ID {track_id}")