formatting / RQ tuning

This commit is contained in:
2025-08-15 13:39:27 -04:00
parent 93050ec6cf
commit 0cd4a71db2
2 changed files with 18 additions and 11 deletions

View File

@@ -11,6 +11,7 @@ from utils.rip_background import bulk_download
from lyric_search.sources import private from lyric_search.sources import private
from pydantic import BaseModel from pydantic import BaseModel
class ValidBulkFetchRequest(BaseModel): class ValidBulkFetchRequest(BaseModel):
track_ids: list[int] track_ids: list[int]
@@ -119,7 +120,13 @@ class RIP(FastAPI):
} }
) )
track_ids = data.track_ids track_ids = data.track_ids
job = self.task_queue.enqueue(bulk_download, track_ids) job = self.task_queue.enqueue(
bulk_download,
args=(track_ids,),
timeout=3600,
failure_ttl=86400,
result_ttl=86400,
)
self.redis_conn.lpush("enqueued_job_ids", job.id) self.redis_conn.lpush("enqueued_job_ids", job.id)
return JSONResponse( return JSONResponse(
content={ content={
@@ -150,7 +157,7 @@ class RIP(FastAPI):
"""List all jobs in the queue (queued + finished, if result_ttl allows)""" """List all jobs in the queue (queued + finished, if result_ttl allows)"""
jobs_info = [] jobs_info = []
# 1 Jobs still in the queue (pending) # Jobs still in the queue (pending)
for job in self.task_queue.jobs: for job in self.task_queue.jobs:
jobs_info.append( jobs_info.append(
{ {
@@ -162,7 +169,7 @@ class RIP(FastAPI):
} }
) )
# 2 Started/running jobs tracked via enqueued_job_ids # Started/running jobs tracked via enqueued_job_ids
job_ids = self.redis_conn.lrange("enqueued_job_ids", 0, -1) job_ids = self.redis_conn.lrange("enqueued_job_ids", 0, -1)
for jid_bytes in job_ids: # type: ignore for jid_bytes in job_ids: # type: ignore
jid = jid_bytes.decode() jid = jid_bytes.decode()

View File

@@ -73,7 +73,7 @@ def bulk_download(track_list: list):
while attempt < MAX_RETRIES: while attempt < MAX_RETRIES:
attempt += 1 attempt += 1
try: try:
# 1 Get track URL # Get track URL
url = await sr.get_stream_url_by_track_id(track_id) url = await sr.get_stream_url_by_track_id(track_id)
if not url: if not url:
logging.critical( logging.critical(
@@ -84,7 +84,7 @@ def bulk_download(track_list: list):
) )
continue continue
# 2 Download file (chunked) # Download file (chunked)
parsed = urlparse(url) parsed = urlparse(url)
ext = Path(unquote(parsed.path)).suffix or ".mp3" ext = Path(unquote(parsed.path)).suffix or ".mp3"
tmp_file = Path(f"/tmp/{track_id}{ext}") tmp_file = Path(f"/tmp/{track_id}{ext}")
@@ -95,7 +95,7 @@ def bulk_download(track_list: list):
async for chunk in resp.content.iter_chunked(64 * 1024): async for chunk in resp.content.iter_chunked(64 * 1024):
f.write(chunk) f.write(chunk)
# 3 Extract metadata # Extract metadata
metadata = await sr.get_metadata_by_track_id(track_id) metadata = await sr.get_metadata_by_track_id(track_id)
if not metadata: if not metadata:
logging.critical( logging.critical(
@@ -109,13 +109,13 @@ def bulk_download(track_list: list):
logging.critical("Got metadata: %s/%s/%s", artist, album, title) logging.critical("Got metadata: %s/%s/%s", artist, album, title)
# 4 Organize path # Organize path
final_dir = ROOT_DIR / artist / album final_dir = ROOT_DIR / artist / album
final_dir.mkdir(parents=True, exist_ok=True) final_dir.mkdir(parents=True, exist_ok=True)
final_file = final_dir / f"{title}{ext}" final_file = final_dir / f"{title}{ext}"
tmp_file.rename(final_file) tmp_file.rename(final_file)
# 5 Track per-track info # Track per-track info
track_info.update( track_info.update(
{"status": "success", "file_path": str(final_file)} {"status": "success", "file_path": str(final_file)}
) )
@@ -134,17 +134,17 @@ def bulk_download(track_list: list):
random.uniform(THROTTLE_MIN, THROTTLE_MAX) random.uniform(THROTTLE_MIN, THROTTLE_MAX)
) )
# 6 Update RQ job meta # Update RQ job meta
per_track_meta.append(track_info) per_track_meta.append(track_info)
if job: if job:
job.meta["progress"] = int((i + 1) / total * 100) job.meta["progress"] = int((i + 1) / total * 100)
job.meta["tracks"] = per_track_meta job.meta["tracks"] = per_track_meta
job.save_meta() job.save_meta()
# 7 Throttle between downloads # Throttle between downloads
await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX)) await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))
# 8 Create per-artist tarballs # Create per-artist tarballs
tarballs = [] tarballs = []
for artist, files in artist_files.items(): for artist, files in artist_files.items():
short_id = uuid.uuid4().hex[:8] short_id = uuid.uuid4().hex[:8]