misc

2025-08-20 15:58:07 -04:00
parent 81f79dea1e
commit e0f64f6773
2 changed files with 93 additions and 65 deletions
--- a/endpoints/rip.py
+++ b/endpoints/rip.py
@@ -7,6 +7,7 @@ from auth.deps import get_current_user
 from redis import Redis
 from rq import Queue
 from rq.job import Job
+from rq.job import JobStatus
 from utils.rip_background import bulk_download
 from lyric_search.sources import private
 from pydantic import BaseModel
@@ -125,16 +126,24 @@ class RIP(FastAPI):
        target = data.target
        job = self.task_queue.enqueue(
            bulk_download,
-            args=(track_ids, target),
+            args=(track_ids,),
            job_timeout=14400,
            failure_ttl=86400,
            result_ttl=86400,
+            meta={
+                'progress': 0,
+                'status': 'queued',
+                'target': target,
+                'tracks_in': len(track_ids),
+            }
+
        )
        self.redis_conn.lpush("enqueued_job_ids", job.id)
        return JSONResponse(
            content={
                "job_id": job.id,
                "status": "queued",
+                "target": job.meta.get("target", None)
            }
        )

@@ -146,16 +155,25 @@ class RIP(FastAPI):
            job = Job.fetch(job_id, connection=self.redis_conn)
        except Exception:
            return JSONResponse({"error": "Job not found"}, status_code=404)
+        job_status: str|JobStatus = job.get_status()
+        job_progress  = job.meta.get("progress", 0)
+        job_tarball = job.meta.get("tarball")
+        if job_progress == 100 and not job_tarball:
+            job_status = "compressing"
+        tracks_out = len(job.meta.get("tracks", []))
+        tracks_in = job.meta.get("tracks_in", None)

        return {
            "id": job.id,
-            "status": job.get_status(),
+            "status": job_status,
            "result": job.result,
            "enqueued_at": job.enqueued_at,
            "started_at": job.started_at,
            "ended_at": job.ended_at,
-            "progress": job.meta.get("progress"),
+            "progress": job_progress,
            "target": job.meta.get("target"),
+            "tracks": f"{tracks_out} / {tracks_in}" if isinstance(tracks_in, int) else tracks_out,
+            "tarball": job_tarball,
        }

    async def job_list_handler(self, request: Request, user=Depends(get_current_user)):
@@ -164,18 +182,23 @@ class RIP(FastAPI):

        # Jobs still in the queue (pending)
        for job in self.task_queue.jobs:
+            status: str|JobStatus = job.get_status()
+            job_progress = job.meta.get("progress", 0)
+            tarball = job.meta.get("tarball")
+            if job_progress == 100 and not tarball:
+                status = "compressing"
            jobs_info.append(
                {
                    "id": job.id,
-                    "status": job.get_status(),  # queued
+                    "status": status,  # queued
                    "result": job.result,
                    "enqueued_at": job.enqueued_at,
-                    "progress": job.meta.get("progress", None),
+                    "progress": job.meta.get("progress", 0),
                    "target": job.meta.get("target", None)
                }
            )

-        # Started/running jobs tracked via enqueued_job_ids
+        # Running jobs tracked via enqueued_job_ids
        job_ids = self.redis_conn.lrange("enqueued_job_ids", 0, -1)
        for jid_bytes in job_ids:  # type: ignore
            jid = jid_bytes.decode()
@@ -184,19 +207,24 @@ class RIP(FastAPI):
            except Exception:
                continue  # job may have completed and expired

-            status = job.get_status()
-            if status in ("started", "queued", "finished"):
+            job_status: str|JobStatus = job.get_status()
+            job_progress = job.meta.get("progress", 0)
+            if job_progress == 100 and not job.meta.get("tarball"):
+                job_status = "compressing"
+            if job_status in ("started", "queued", "finished", "failed", "compressing"):
                # avoid duplicates for jobs already in task_queue.jobs
                if not any(j["id"] == job.id for j in jobs_info):
+                    tracks_in = job.meta.get("tracks_in", None)
+                    tracks_out = len(job.meta.get("tracks", []))
                    jobs_info.append(
                        {
                            "id": job.id,
-                            "status": status,
+                            "status": job_status,
                            "result": job.result,
                            "tarball": job.meta.get("tarball", None),
                            "enqueued_at": job.enqueued_at,
-                            "progress": job.meta.get("progress", None),
-                            "tracks": job.meta.get("tracks", None),
+                            "progress": job.meta.get("progress", 0),
+                            "tracks": f"{tracks_out} / {tracks_in}" if isinstance(tracks_in, int) else tracks_out,
                            "target": job.meta.get("target", None),
                        }
                    )
--- a/utils/rip_background.py
+++ b/utils/rip_background.py
@@ -14,7 +14,7 @@ from rq import get_current_job
 from utils.sr_wrapper import SRUtil

 # ---------- Config ----------
-ROOT_DIR = Path("/storage/music2")      # change to your music folder
+ROOT_DIR = Path("/storage/music2")     
 MAX_RETRIES = 3
 THROTTLE_MIN = 0.3
 THROTTLE_MAX = 1.0
@@ -56,26 +56,21 @@ def sanitize_filename(name: str) -> str:


 def ensure_unique_path(p: Path) -> Path:
-    """If path exists, append ' (n)' before extension."""
-    if not p.exists():
-        return p
+    """Always append a short UUID fragment before the extension."""
    stem, suffix = p.stem, p.suffix
    parent = p.parent
-    n = 2
-    while True:
-        candidate = parent / f"{stem} ({n}){suffix}"
-        if not candidate.exists():
-            return candidate
-        n += 1
+
+    short_id = uuid.uuid4().hex[:8]
+    return parent / f"{stem}_{short_id}{suffix}"

 # ---------- Job ----------
-def bulk_download(track_list: list, target: str):
+def bulk_download(track_list: list):
    """
    RQ job:
      - fetches stream URLs
      - downloads with retries + throttling
      - uses SR metadata to name/organize files
-      - creates ONE tarball for all tracks, with all artist names in the filename
+      - creates ONE tarball for all tracks
      - returns [tarball_path]
    """
    job = get_current_job()
@@ -87,7 +82,7 @@ def bulk_download(track_list: list, target: str):
            job.meta["tracks"] = []           # will hold per-track dicts
            job.meta["progress"] = 0
            job.meta["tarball"] = None
-            job.meta["target"] = target
+            job.meta["status"] = "started"
            job.save_meta()
        except Exception as e:
            logging.warning("Failed to init job.meta: %s", e)
@@ -102,6 +97,9 @@ def bulk_download(track_list: list, target: str):
        async with aiohttp.ClientSession(headers=HEADERS) as session:
            total = len(track_list or [])
            logging.critical("Total tracks to process: %s", total)
+            if job:
+                job.meta["progress"] = 0
+                job.save_meta()

            for i, track_id in enumerate(track_list or []):
                track_info = {
@@ -164,6 +162,10 @@ def bulk_download(track_list: list, target: str):
                        track_info["file_path"] = str(final_file)
                        track_info["error"] = None
                        all_final_files.append(final_file)
+
+                        if job:
+                            job.meta["progress"] = int(((i + 1) / total) * 100)
+                            job.save_meta()
                        break  # success; exit retry loop

                    except Exception as e:
@@ -186,7 +188,6 @@ def bulk_download(track_list: list, target: str):
                if job:
                    try:
                        job.meta["tracks"] = per_track_meta
-                        job.meta["progress"] = int(((i + 1) / max(total, 1)) * 100)
                        job.save_meta()
                    except Exception as e:
                        logging.warning("Failed to update job.meta after track %s: %s", track_id, e)
@@ -194,7 +195,7 @@ def bulk_download(track_list: list, target: str):
                # Throttle between tracks
                await asyncio.sleep(random.uniform(THROTTLE_MIN, THROTTLE_MAX))

-               # ---- Single combined tarball for all tracks ----
+        # ---- Single combined tarball for all tracks ----
        if not all_final_files:
            if job:
                try:
@@ -230,26 +231,41 @@ def bulk_download(track_list: list, target: str):
        final_tarball = ROOT_DIR / "completed" / staged_tarball.name
        final_tarball.parent.mkdir(parents=True, exist_ok=True)

+        if job:
+            try:
+                job.meta["status"] = "compressing"
+                job.save_meta()
+            except Exception:
+                pass            

-        with tarfile.open(staged_tarball, "w:gz") as tar:
-            # Update job status → compressing
+        logging.info("Creating tarball: %s", staged_tarball)
+
+        # Run blocking tar creation in background thread
+        def _create_tar_sync():
+            with tarfile.open(staged_tarball, "w:gz") as tar:
+                for f in all_final_files:
+                    try:
+                        arcname = f.relative_to(ROOT_DIR)
+                    except ValueError:
+                        arcname = f.name
+                    tar.add(f, arcname=str(arcname))
+                    try:
+                        os.remove(f)
+                    except Exception:
+                        pass
+
+        await asyncio.to_thread(_create_tar_sync)
+
+        # sanity check
+        if not staged_tarball.exists():
+            logging.error("Tarball was not created: %s", staged_tarball)
            if job:
                try:
-                    job.meta["status"] = "compressing"
+                    job.meta["status"] = "compress_failed"
                    job.save_meta()
                except Exception:
                    pass
-            logging.info("Creating tarball: %s", staged_tarball)
-            for f in all_final_files:
-                try:
-                    arcname = f.relative_to(ROOT_DIR)
-                except ValueError:
-                    arcname = f.name
-                tar.add(f, arcname=str(arcname))
-                try:
-                    os.remove(f)
-                except Exception:
-                    pass
+            return []

        logging.critical("Tarball created: %s", staged_tarball)

@@ -260,32 +276,11 @@ def bulk_download(track_list: list, target: str):
            shutil.move(str(staged_tarball), str(final_tarball))

        logging.critical("Tarball finalized: %s", final_tarball)
-
-        # Cleanup empty dirs (unchanged)
-        to_check = set()
-        for p in all_final_files:
-            if p.parent:
-                to_check.add(p.parent)
-            if p.parent and p.parent.parent:
-                to_check.add(p.parent.parent)
-        for d in sorted(to_check, key=lambda p: len(p.parts), reverse=True):
-            if d.is_dir():
-                try:
-                    next(d.iterdir())
-                except StopIteration:
-                    shutil.rmtree(d, ignore_errors=True)
-                except Exception:
-                    pass
-
-        # Update job status → done
        if job:
-            try:
-                job.meta["tarball"] = str(final_tarball)
-                job.meta["progress"] = 100
-                job.meta["status"] = "done"
-                job.save_meta()
-            except Exception as e:
-                logging.warning("Failed to write final status to job.meta: %s", e)
+            job.meta["tarball"] = str(final_tarball)
+            job.meta["progress"] = 100
+            job.meta["status"] = "completed"
+            job.save_meta()

        return [str(final_tarball)]

@@ -294,5 +289,10 @@ def bulk_download(track_list: list, target: str):
    asyncio.set_event_loop(loop)
    try:
        return loop.run_until_complete(process_tracks())
+    except Exception as e:
+        if job:
+            job.meta["status"] = "failed"
+            job.save_meta()
+            logging.critical("Exception: %s", str(e))
    finally:
        loop.close()