Add bulk video download functionality

- Implemented `bulk_video_download` function to handle video downloads, including metadata fetching, HLS stream handling, and tarball creation.
- Enhanced `bulk_download` function in `rip_background.py` to improve error logging with formatted track descriptions.
- Added video search and metadata retrieval methods in `sr_wrapper.py` for better integration with Tidal's video API.
- Updated Tidal client credentials
This commit is contained in:
2026-02-18 13:38:26 -05:00
parent 9d16c96490
commit d6689b9c38
4 changed files with 1257 additions and 81 deletions

View File

@@ -320,6 +320,15 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
}
attempt = 0
def _track_desc() -> str:
"""Format track info for log messages."""
title = track_info.get("title") or f"Track {track_id}"
artist = track_info.get("artist") or "Unknown"
album = track_info.get("album") or ""
if album:
return f"{track_id} - '{title}' by {artist} [{album}]"
return f"{track_id} - '{title}' by {artist}"
# Fetch metadata FIRST to check if track is available before attempting download
md = None
try:
@@ -327,6 +336,12 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
md = await sr.get_metadata_by_track_id(track_id) or {}
print(f"DEBUG: Metadata fetched: {bool(md)}")
# Populate track_info immediately so failure logs have useful info
if md:
track_info["title"] = md.get("title") or f"Track {track_id}"
track_info["artist"] = md.get("artist") or "Unknown Artist"
track_info["album"] = md.get("album") or "Unknown Album"
# Check if track is streamable
if md and not md.get("streamable", True):
print(f"TRACK {track_id}: Not streamable, skipping")
@@ -564,7 +579,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
break
except aiohttp.ClientResponseError as e:
msg = f"Track {track_id} attempt {attempt} ClientResponseError: {e}"
msg = f"Track {_track_desc()} attempt {attempt} ClientResponseError: {e}"
send_log_to_discord(msg, "WARNING", target)
# If 429, backoff as before. If 5xx, recreate session and refresh Tidal client.
if getattr(e, "status", None) == 429:
@@ -581,7 +596,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
try:
await sr._force_fresh_login()
send_log_to_discord(
f"Refreshed Tidal session after 5xx error on track {track_id}",
f"Refreshed Tidal session after 5xx error on track {_track_desc()}",
"WARNING",
target,
)
@@ -625,7 +640,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
if is_not_found:
# Permanent failure - do not retry
msg = (
f"Track {track_id} not found/unavailable, skipping: {e}"
f"Track {_track_desc()} not found/unavailable, skipping: {e}"
)
print(msg)
send_log_to_discord(msg, "WARNING", target)
@@ -634,7 +649,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
break # Exit retry loop immediately
elif is_5xx_error:
msg = (
f"Track {track_id} attempt {attempt} server error: {e}"
f"Track {_track_desc()} attempt {attempt} server error: {e}"
)
send_log_to_discord(msg, "WARNING", target)
track_info["error"] = err_str
@@ -648,7 +663,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
try:
await sr._force_fresh_login()
send_log_to_discord(
f"Refreshed Tidal session after 5xx error on track {track_id}",
f"Refreshed Tidal session after 5xx error on track {_track_desc()}",
"WARNING",
target,
)
@@ -661,7 +676,7 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
if attempt >= MAX_RETRIES:
track_info["status"] = "Failed"
send_log_to_discord(
f"Track {track_id} failed after {attempt} attempts (5xx)",
f"Track {_track_desc()} failed after {attempt} attempts (5xx)",
"ERROR",
target,
)
@@ -670,13 +685,13 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
)
elif is_no_stream_url:
if attempt == 1 or attempt == MAX_RETRIES:
msg = f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
msg = f"Track {_track_desc()} attempt {attempt} failed: {e}\n{tb}"
send_log_to_discord(msg, "ERROR", target)
track_info["error"] = str(e)
if attempt >= MAX_RETRIES:
track_info["status"] = "Failed"
send_log_to_discord(
f"Track {track_id} failed after {attempt} attempts",
f"Track {_track_desc()} failed after {attempt} attempts",
"ERROR",
target,
)
@@ -685,14 +700,14 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
)
else:
msg = (
f"Track {track_id} attempt {attempt} failed: {e}\n{tb}"
f"Track {_track_desc()} attempt {attempt} failed: {e}\n{tb}"
)
send_log_to_discord(msg, "ERROR", target)
track_info["error"] = str(e)
if attempt >= MAX_RETRIES:
track_info["status"] = "Failed"
send_log_to_discord(
f"Track {track_id} failed after {attempt} attempts",
f"Track {_track_desc()} failed after {attempt} attempts",
"ERROR",
target,
)
@@ -885,11 +900,367 @@ def bulk_download(track_list: list, quality: str = "FLAC"):
loop.close()
# Correct integration of FLAC stream check
async def process_tracks(track_list):
for i, track_id in enumerate(track_list or []):
combined_path = f"/tmp/{uuid.uuid4().hex}_combined.m4s" # Example path
if not await check_flac_stream(combined_path):
logger.error(f"No FLAC stream found in {combined_path}. Skipping file.")
continue
# Proceed with decoding pipeline
# ---------- bulk_video_download ----------
def bulk_video_download(video_list: list):
"""
RQ job for bulk video downloads:
- fetches video metadata and HLS streams
- downloads with ffmpeg in highest quality
- creates ONE tarball for all videos
- returns [tarball_path]
- sends relevant messages to Discord
"""
job = get_current_job()
job_id = job.id if job else uuid.uuid4().hex
target = job.meta.get("target") if job else None
staging_root = ROOT_DIR / f"video_{job_id}"
if job:
try:
job.meta["video_ids"] = [str(v) for v in (video_list or [])]
job.meta["videos"] = []
job.meta["progress"] = 0
job.meta["tarball"] = None
job.meta["status"] = "Started"
job.save_meta()
except Exception as e:
send_log_to_discord(f"Failed to init job.meta: {e}", "WARNING", target)
# Job started Discord message
asyncio.run(
discord_notify(
DISCORD_WEBHOOK,
title=f"Video Job Started: {job_id}",
description=f"Processing `{len(video_list)}` video(s)",
target=target,
color=0x00FFFF,
)
)
async def process_videos(video_list):
per_video_meta = []
all_final_files = []
all_artists = set()
(ROOT_DIR / "completed").mkdir(parents=True, exist_ok=True)
total = len(video_list or [])
for i, video_id in enumerate(video_list or []):
print(f"DEBUG: Processing video {i + 1}/{total}: {video_id}")
video_info = {
"video_id": str(video_id),
"title": None,
"artist": None,
"status": "Pending",
"file_path": None,
"filename": None,
"error": None,
"attempts": 0,
}
attempt = 0
def _video_desc() -> str:
"""Format video info for log messages."""
title = video_info.get("title") or f"Video {video_id}"
artist = video_info.get("artist") or "Unknown"
return f"{video_id} - '{title}' by {artist}"
# Fetch metadata first
md = None
try:
print(f"DEBUG: Fetching metadata for video {video_id}")
md = await sr.get_video_metadata(video_id)
print(f"DEBUG: Metadata fetched: {bool(md)}")
if md:
video_info["title"] = md.get("title") or f"Video {video_id}"
video_info["artist"] = md.get("artist") or "Unknown Artist"
except Exception as meta_err:
print(f"VIDEO {video_id}: Metadata fetch failed: {meta_err}")
md = None
while attempt < MAX_RETRIES:
attempt += 1
video_info["attempts"] = attempt
try:
# Use sr.download_video which handles HLS and quality selection
print(f"VIDEO {video_id}: Starting download (attempt {attempt})")
# Download to temporary location
tmp_dir = Path(f"/tmp/video_{uuid.uuid4().hex}")
tmp_dir.mkdir(parents=True, exist_ok=True)
# Add timeout for video download (30 minutes max per video)
try:
file_path = await asyncio.wait_for(
sr.download_video(video_id, str(tmp_dir)),
timeout=1800 # 30 minutes
)
except asyncio.TimeoutError:
print(f"VIDEO {video_id}: Download timed out after 30 minutes")
raise RuntimeError("Download timed out after 30 minutes")
if not file_path or not Path(file_path).exists():
raise RuntimeError("Download completed but no file created")
# If we didn't get metadata earlier, try again
if not md:
try:
md = await sr.get_video_metadata(video_id)
except Exception:
md = {}
md = md or {}
artist_raw = md.get("artist") or "Unknown Artist"
title_raw = md.get("title") or f"Video {video_id}"
artist = sanitize_filename(artist_raw)
title = sanitize_filename(title_raw)
video_info["title"] = title
video_info["artist"] = artist
print(f"VIDEO {video_id}: Processing '{title}' by {artist}")
all_artists.add(artist)
video_dir = staging_root / artist
video_dir.mkdir(parents=True, exist_ok=True)
final_file = ensure_unique_path(video_dir / f"{title}.mp4")
# Move to final location
print(f"VIDEO {video_id}: Moving to final location...")
shutil.move(str(file_path), str(final_file))
# Clean up temp dir
try:
shutil.rmtree(tmp_dir, ignore_errors=True)
except Exception:
pass
print(f"VIDEO {video_id}: File moved successfully")
# Success
video_info["status"] = "Success"
video_info["file_path"] = str(final_file)
try:
video_info["filename"] = final_file.name
except Exception:
video_info["filename"] = None
video_info["error"] = None
all_final_files.append(final_file)
print(
f"VIDEO {video_id}: SUCCESS! Progress: {((i + 1) / total) * 100:.0f}%"
)
# Throttle
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
if job:
job.meta["progress"] = int(((i + 1) / total) * 100)
job.meta["videos"] = per_video_meta + [video_info]
job.save_meta()
break
except Exception as e:
tb = traceback.format_exc()
err_str = str(e).lower()
is_not_found = any(
phrase in err_str
for phrase in (
"video not found",
"not found",
"404",
"does not exist",
"no longer available",
)
)
if is_not_found:
msg = f"Video {_video_desc()} not found/unavailable, skipping: {e}"
print(msg)
send_log_to_discord(msg, "WARNING", target)
video_info["status"] = "Failed"
video_info["error"] = str(e)
break
else:
msg = f"Video {_video_desc()} attempt {attempt} failed: {e}\n{tb}"
send_log_to_discord(msg, "ERROR", target)
video_info["error"] = str(e)
if attempt >= MAX_RETRIES:
video_info["status"] = "Failed"
send_log_to_discord(
f"Video {_video_desc()} failed after {attempt} attempts",
"ERROR",
target,
)
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
# Ensure placeholders for job metadata
video_info["title"] = video_info.get("title") or f"Video {video_id}"
video_info["artist"] = video_info.get("artist") or "Unknown Artist"
if video_info.get("file_path") and not video_info.get("filename"):
try:
video_info["filename"] = Path(video_info["file_path"]).name
except Exception:
video_info["filename"] = None
per_video_meta.append(video_info)
if not all_final_files:
if job:
job.meta["tarball"] = None
job.meta["status"] = "Failed"
job.save_meta()
send_log_to_discord(
f"No videos were successfully downloaded for job `{job_id}`",
"CRITICAL",
target,
)
return []
# Tarball creation
artist_counts = {}
for v in per_video_meta:
if v["status"] == "Success" and v.get("file_path"):
try:
artist = Path(v["file_path"]).relative_to(staging_root).parts[0]
except Exception:
artist = "Unknown Artist"
artist_counts[artist] = artist_counts.get(artist, 0) + 1
top_artist = (
sorted(artist_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
if artist_counts
else "Unknown Artist"
)
target_name = None
try:
if job and job.meta:
target_name = job.meta.get("target")
except Exception:
target_name = None
base_label = (
sanitize_filename(target_name)
if target_name
else sanitize_filename(top_artist)
)
staged_tarball = staging_root / f"{base_label}_videos.tar.gz"
counter = 1
base_name = staged_tarball.stem
while staged_tarball.exists():
counter += 1
staged_tarball = staging_root / f"{base_name} ({counter}).tar.gz"
final_dir = Path("/storage/music/TRIP/videos")
final_dir.mkdir(parents=True, exist_ok=True)
final_tarball = ensure_unique_filename_in_dir(final_dir, staged_tarball.name)
if job:
job.meta["status"] = "Compressing"
job.save_meta()
logging.info("Creating video tarball: %s", staged_tarball)
await discord_notify(
DISCORD_WEBHOOK,
title=f"Compressing: Video Job {job_id}",
description=f"Creating tarball: `{len(all_final_files)}` video(s).\nStaging path: {staged_tarball}",
color=0xFFA500,
target=target,
)
try:
subprocess.run(
[
"tar",
"-I",
"pigz -9",
"-cf",
str(staged_tarball),
"-C",
str(staging_root),
]
+ [str(f.relative_to(staging_root)) for f in all_final_files],
check=True,
)
for f in all_final_files:
try:
os.remove(f)
except Exception:
pass
except FileNotFoundError:
send_log_to_discord(
"pigz not available, falling back to tarfile (slower).",
"WARNING",
target,
)
with tarfile.open(staged_tarball, "w:gz") as tar:
for f in all_final_files:
try:
arcname = f.relative_to(staging_root)
except ValueError:
arcname = f.name
tar.add(f, arcname=str(arcname))
try:
os.remove(f)
except Exception:
pass
except Exception as e:
send_log_to_discord(f"Video tar creation failed: {e}", "ERROR", target)
if job:
job.meta["status"] = "compress_failed"
job.save_meta()
return []
if not staged_tarball.exists():
send_log_to_discord(
f"Video tarball was not created: `{staged_tarball}`", "CRITICAL", target
)
if job:
job.meta["status"] = "compress_failed"
job.save_meta()
return []
try:
staged_tarball.rename(final_tarball)
except Exception:
shutil.move(str(staged_tarball), str(final_tarball))
await asyncio.to_thread(shutil.rmtree, staging_root, ignore_errors=True)
if job:
job.meta["tarball"] = str(final_tarball)
job.meta["progress"] = 100
job.meta["status"] = "Completed"
job.save_meta()
# Job completed Discord message
completed = len(all_final_files)
failed = len(video_list) - completed
await discord_notify(
DISCORD_WEBHOOK,
title=f"Video Job Completed: {job_id}",
description=f"Processed `{len(video_list)}` video(s).\nCompleted: `{completed}`\nFailed: `{failed}`\nTarball: `{final_tarball}`",
target=target,
color=0x00FF00,
)
logging.info("Video job %s finished, tarball: %s", job_id, final_tarball)
return [str(final_tarball)]
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(process_videos(video_list))
except Exception as e:
send_log_to_discord(
f"bulk_video_download failed: {e}\n{traceback.format_exc()}", "CRITICAL", target
)
if job:
job.meta["status"] = "Failed"
job.save_meta()
finally:
loop.close()