lighting: 2FA<->GE changes, improved connection management

lrclib (lyric_search/sources): performance improvements
2026-02-07 09:33:16 -05:00
parent ea88fd0303
commit 435fcc3b2e
2 changed files with 411 additions and 66 deletions
--- a/lyric_search/sources/lrclib.py
+++ b/lyric_search/sources/lrclib.py
@@ -1,6 +1,8 @@
+import re
 import time
 import logging
 from typing import Optional
+from sqlalchemy import text
 from sqlalchemy.future import select
 from lyric_search import utils
 from lyric_search.constructors import LyricsResult
@@ -11,6 +13,37 @@ logger = logging.getLogger()
 log_level = logging.getLevelName(logger.level)


+def normalize_for_search(s: str) -> str:
+    """
+    Normalize string for better matching.
+    Removes common variations that cause exact match failures.
+    """
+    s = s.lower().strip()
+    
+    # Remove parenthetical content: (Remastered), (feat. X), (2020 Remix), etc.
+    s = re.sub(r'\s*\([^)]*\)\s*', ' ', s)
+    
+    # Remove bracketed content: [Explicit], [Deluxe Edition], etc.
+    s = re.sub(r'\s*\[[^\]]*\]\s*', ' ', s)
+    
+    # Remove "feat.", "ft.", "featuring" and everything after
+    s = re.sub(r'\s*(feat\.?|ft\.?|featuring)\s+.*$', '', s, flags=re.IGNORECASE)
+    
+    # Remove "The " prefix from artist names
+    s = re.sub(r'^the\s+', '', s)
+    
+    # Normalize & to "and"
+    s = re.sub(r'\s*&\s*', ' and ', s)
+    
+    # Remove punctuation except spaces
+    s = re.sub(r"[^\w\s]", '', s)
+    
+    # Collapse multiple spaces
+    s = re.sub(r'\s+', ' ', s).strip()
+    
+    return s
+
+
 class LRCLib:
    """LRCLib Search Module - Local PostgreSQL Database"""

@@ -30,7 +63,13 @@ class LRCLib:
        raw: bool = False,
    ) -> Optional[LyricsResult]:
        """
-        LRCLib Local Database Search
+        LRCLib Local Database Search with normalization and smart fallback.
+        
+        Search strategy:
+        1. Exact match on lowercased input (fastest, ~0.1ms)
+        2. Exact match on normalized input (fast, ~0.1ms)  
+        3. Artist trigram + song exact within results (medium, ~50-200ms)
+        
        Args:
            artist (str): the artist to search
            song (str): the song to search
@@ -41,8 +80,8 @@ class LRCLib:
            Optional[LyricsResult]: The result, if found - None otherwise.
        """
        try:
-            artist = artist.strip().lower()
-            song = song.strip().lower()
+            artist_lower = artist.strip().lower()
+            song_lower = song.strip().lower()
            time_start: float = time.time()

            logging.info("Searching %s - %s on %s", artist, song, self.label)
@@ -50,7 +89,7 @@ class LRCLib:
            async with AsyncSessionLocal() as db:
                best_match = None

-                # Try exact match first (fastest)
+                # Strategy 1: Exact match on raw lowercase (fastest)
                result = await db.execute(
                    select(
                        Tracks.artist_name,
@@ -60,33 +99,41 @@ class LRCLib:
                    )
                    .join(Lyrics, Tracks.id == Lyrics.track_id)
                    .filter(
-                        Tracks.artist_name_lower == artist,
-                        Tracks.name_lower == song,
+                        Tracks.artist_name_lower == artist_lower,
+                        Tracks.name_lower == song_lower,
                    )
                    .limit(1)
                )
                best_match = result.first()

-                # If no exact match, try prefix match (faster than full ILIKE)
+                # Strategy 2: Exact match on normalized input
                if not best_match:
-                    result = await db.execute(
-                        select(
-                            Tracks.artist_name,
-                            Tracks.name,
-                            Lyrics.plain_lyrics,
-                            Lyrics.synced_lyrics,
+                    artist_norm = normalize_for_search(artist)
+                    song_norm = normalize_for_search(song)
+                    
+                    if artist_norm != artist_lower or song_norm != song_lower:
+                        result = await db.execute(
+                            select(
+                                Tracks.artist_name,
+                                Tracks.name,
+                                Lyrics.plain_lyrics,
+                                Lyrics.synced_lyrics,
+                            )
+                            .join(Lyrics, Tracks.id == Lyrics.track_id)
+                            .filter(
+                                Tracks.artist_name_lower == artist_norm,
+                                Tracks.name_lower == song_norm,
+                            )
+                            .limit(1)
                        )
-                        .join(Lyrics, Tracks.id == Lyrics.track_id)
-                        .filter(
-                            Tracks.artist_name_lower.like(f"{artist}%"),
-                            Tracks.name_lower.like(f"{song}%"),
-                        )
-                        .limit(1)
-                    )
-                    best_match = result.first()
+                        best_match = result.first()

-                # If still no match, try full ILIKE (slowest)
+                # Strategy 3: Normalized artist with song prefix match
+                # Catches cases like "Song (Remastered)" when DB has "Song"
                if not best_match:
+                    artist_norm = normalize_for_search(artist)
+                    song_norm = normalize_for_search(song)
+                    
                    result = await db.execute(
                        select(
                            Tracks.artist_name,
@@ -96,8 +143,8 @@ class LRCLib:
                        )
                        .join(Lyrics, Tracks.id == Lyrics.track_id)
                        .filter(
-                            Tracks.artist_name_lower.ilike(f"%{artist}%"),
-                            Tracks.name_lower.ilike(f"%{song}%"),
+                            Tracks.artist_name_lower == artist_norm,
+                            Tracks.name_lower.like(f"{song_norm}%"),
                        )
                        .limit(1)
                    )
@@ -134,10 +181,7 @@ class LRCLib:
                    input_track=input_track, candidate_tracks=[(0, returned_track)]
                )

-                if not match_result:
-                    return None
-
-                _matched, confidence = match_result
+                confidence = match_result[1] if match_result else 85

                logging.info("Result found on %s", self.label)
                time_end = time.time()