- Implemented a new script to upsert data from SQLite dumps into PostgreSQL asynchronously.

- Added detailed reporting for each table processed, including success and error statuses. - Integrated Discord notifications for various stages of the upsert process. - Included functionality to fetch the latest SQLite dump from a specified URL. - Added error handling and state management for the upsert process. Resolves #34
2026-01-25 22:06:24 -05:00
parent 97fd7dd67d
commit 277804d212
8 changed files with 2136 additions and 27 deletions
--- a/endpoints/lyric_search.py
+++ b/endpoints/lyric_search.py
@@ -245,9 +245,9 @@ class LyricSearch(FastAPI):
                        if i + line_count <= len(lyric_lines):
                            # Combine consecutive lines with space separator
                            combined_lines = []
-                            line_positions: list[
+                            line_positions: list[tuple[int, int]] = (
-                                tuple[int, int]
+                                []
-                            ] = []  # Track where each line starts in combined text
+                            )  # Track where each line starts in combined text
                            combined_text_parts: list[str] = []
                            for j in range(line_count):
--- a/lyric_search/models.py
+++ b/lyric_search/models.py
@@ -99,7 +99,9 @@ POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
 # URL-encode the password to handle special characters
 encoded_password = urllib.parse.quote_plus(POSTGRES_PASSWORD)
-DATABASE_URL: str = f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
+DATABASE_URL: str = (
    f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
 )
 async_engine: AsyncEngine = create_async_engine(
    DATABASE_URL, pool_size=20, max_overflow=10, pool_pre_ping=True, echo=False
 )
--- a/lyric_search/sources/cache.py
+++ b/lyric_search/sources/cache.py
@@ -91,8 +91,10 @@ class Cache:
        logging.debug(
            "Checking whether %s is already stored", artistsong.replace("\n", " - ")
        )
-        check_query: str = 'SELECT id, artist, song FROM lyrics  WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
+        check_query: str = (
            'SELECT id, artist, song FROM lyrics  WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
                        <= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1'
        )
        artistsong_split = artistsong.split("\n", maxsplit=1)
        artist = artistsong_split[0].lower()
        song = artistsong_split[1].lower()
@@ -213,10 +215,8 @@ class Cache:
            lyrics = regex.sub(r"(<br>|\n|\r\n)", " / ", lyr_result.lyrics.strip())
            lyrics = regex.sub(r"\s{2,}", " ", lyrics)
-            insert_query = (
+            insert_query = "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
                "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
                VALUES(?, ?, ?, ?, ?, ?, ?)"
            )
            params = (
                lyr_result.src,
                time.time(),
@@ -260,8 +260,10 @@ class Cache:
            if artist == "!" and song == "!":
                random_search = True
-                search_query: str = "SELECT id, artist, song, lyrics, src, confidence\
+                search_query: str = (
                    "SELECT id, artist, song, lyrics, src, confidence\
                FROM lyrics ORDER BY RANDOM() LIMIT 1"
                )
            logging.info("Searching %s - %s on %s", artist, song, self.label)
@@ -320,9 +322,11 @@ class Cache:
                    self.cache_pre_query
                ) as _db_cursor:
                    if not random_search:
-                        search_query: str = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
+                        search_query: str = (
                            'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
                        WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
                        <= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
                        )
                        search_params: tuple = (
                            artist.strip(),
                            song.strip(),
--- a/lyric_search/utils.py
+++ b/lyric_search/utils.py
@@ -111,8 +111,7 @@ class DataUtils:
    """
    def __init__(self) -> None:
-        self.lrc_regex = (
+        self.lrc_regex = regex.compile(  # capture mm:ss and optional .xxx, then the lyric text
            regex.compile(  # capture mm:ss and optional .xxx, then the lyric text
            r"""
            \[                 # literal “[”
            (                # 1st (and only) capture group:
@@ -126,7 +125,6 @@ class DataUtils:
            """,
            regex.VERBOSE,
        )
        )
        self.scrub_regex_1: Pattern = regex.compile(r"(\[.*?\])(\s){0,}(\:){0,1}")
        self.scrub_regex_2: Pattern = regex.compile(
            r"(\d?)(Embed\b)", flags=regex.IGNORECASE
--- a/shared.py
+++ b/shared.py
@@ -92,11 +92,7 @@ def get_redis_sync_client(decode_responses: bool = True) -> redis_sync.Redis:
 async def close_redis_pools() -> None:
    """Close Redis connections. Call on app shutdown."""
-    global \
+    global _redis_async_pool, _redis_async_client, _redis_sync_client, _redis_sync_client_decoded
        _redis_async_pool, \
        _redis_async_client, \
        _redis_sync_client, \
        _redis_sync_client_decoded
    if _redis_async_client:
        await _redis_async_client.close()
--- a/update_lrclib_db.py
+++ b/update_lrclib_db.py
--- a/utils/meme_util.py
+++ b/utils/meme_util.py
@@ -127,7 +127,9 @@ class MemeUtil:
            db_conn.row_factory = sqlite3.Row
            rows_per_page: int = 10
            offset: int = (page - 1) * rows_per_page
-            query: str = "SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?"
+            query: str = (
                "SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?"
            )
            async with await db_conn.execute(query, (offset,)) as db_cursor:
                results = await db_cursor.fetchall()
                for result in results:
--- a/utils/sr_wrapper.py
+++ b/utils/sr_wrapper.py
@@ -1269,9 +1269,11 @@ class SRUtil:
            album,
            song,
            selected_artist,
            (
                best_track.get("album", {}).get("title")
                if best_track.get("album")
-            else None,
+                else None
            ),
            selected_title,
        ):
            # Try to find another candidate that does match metadata
@@ -1294,9 +1296,11 @@ class SRUtil:
                    album,
                    song,
                    cand_artist,
                    (
                        candidate.get("album", {}).get("title")
                        if candidate.get("album")
-                    else None,
+                        else None
                    ),
                    cand_title,
                ):
                    found_better = candidate