lighting: 2FA<->GE changes, improved connection management
lrclib (lyric_search/sources): performance improvements
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
from typing import Optional
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.future import select
|
||||
from lyric_search import utils
|
||||
from lyric_search.constructors import LyricsResult
|
||||
@@ -11,6 +13,37 @@ logger = logging.getLogger()
|
||||
log_level = logging.getLevelName(logger.level)
|
||||
|
||||
|
||||
def normalize_for_search(s: str) -> str:
|
||||
"""
|
||||
Normalize string for better matching.
|
||||
Removes common variations that cause exact match failures.
|
||||
"""
|
||||
s = s.lower().strip()
|
||||
|
||||
# Remove parenthetical content: (Remastered), (feat. X), (2020 Remix), etc.
|
||||
s = re.sub(r'\s*\([^)]*\)\s*', ' ', s)
|
||||
|
||||
# Remove bracketed content: [Explicit], [Deluxe Edition], etc.
|
||||
s = re.sub(r'\s*\[[^\]]*\]\s*', ' ', s)
|
||||
|
||||
# Remove "feat.", "ft.", "featuring" and everything after
|
||||
s = re.sub(r'\s*(feat\.?|ft\.?|featuring)\s+.*$', '', s, flags=re.IGNORECASE)
|
||||
|
||||
# Remove "The " prefix from artist names
|
||||
s = re.sub(r'^the\s+', '', s)
|
||||
|
||||
# Normalize & to "and"
|
||||
s = re.sub(r'\s*&\s*', ' and ', s)
|
||||
|
||||
# Remove punctuation except spaces
|
||||
s = re.sub(r"[^\w\s]", '', s)
|
||||
|
||||
# Collapse multiple spaces
|
||||
s = re.sub(r'\s+', ' ', s).strip()
|
||||
|
||||
return s
|
||||
|
||||
|
||||
class LRCLib:
|
||||
"""LRCLib Search Module - Local PostgreSQL Database"""
|
||||
|
||||
@@ -30,7 +63,13 @@ class LRCLib:
|
||||
raw: bool = False,
|
||||
) -> Optional[LyricsResult]:
|
||||
"""
|
||||
LRCLib Local Database Search
|
||||
LRCLib Local Database Search with normalization and smart fallback.
|
||||
|
||||
Search strategy:
|
||||
1. Exact match on lowercased input (fastest, ~0.1ms)
|
||||
2. Exact match on normalized input (fast, ~0.1ms)
|
||||
3. Artist trigram + song exact within results (medium, ~50-200ms)
|
||||
|
||||
Args:
|
||||
artist (str): the artist to search
|
||||
song (str): the song to search
|
||||
@@ -41,8 +80,8 @@ class LRCLib:
|
||||
Optional[LyricsResult]: The result, if found - None otherwise.
|
||||
"""
|
||||
try:
|
||||
artist = artist.strip().lower()
|
||||
song = song.strip().lower()
|
||||
artist_lower = artist.strip().lower()
|
||||
song_lower = song.strip().lower()
|
||||
time_start: float = time.time()
|
||||
|
||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
||||
@@ -50,7 +89,7 @@ class LRCLib:
|
||||
async with AsyncSessionLocal() as db:
|
||||
best_match = None
|
||||
|
||||
# Try exact match first (fastest)
|
||||
# Strategy 1: Exact match on raw lowercase (fastest)
|
||||
result = await db.execute(
|
||||
select(
|
||||
Tracks.artist_name,
|
||||
@@ -60,33 +99,41 @@ class LRCLib:
|
||||
)
|
||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||
.filter(
|
||||
Tracks.artist_name_lower == artist,
|
||||
Tracks.name_lower == song,
|
||||
Tracks.artist_name_lower == artist_lower,
|
||||
Tracks.name_lower == song_lower,
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
best_match = result.first()
|
||||
|
||||
# If no exact match, try prefix match (faster than full ILIKE)
|
||||
# Strategy 2: Exact match on normalized input
|
||||
if not best_match:
|
||||
result = await db.execute(
|
||||
select(
|
||||
Tracks.artist_name,
|
||||
Tracks.name,
|
||||
Lyrics.plain_lyrics,
|
||||
Lyrics.synced_lyrics,
|
||||
artist_norm = normalize_for_search(artist)
|
||||
song_norm = normalize_for_search(song)
|
||||
|
||||
if artist_norm != artist_lower or song_norm != song_lower:
|
||||
result = await db.execute(
|
||||
select(
|
||||
Tracks.artist_name,
|
||||
Tracks.name,
|
||||
Lyrics.plain_lyrics,
|
||||
Lyrics.synced_lyrics,
|
||||
)
|
||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||
.filter(
|
||||
Tracks.artist_name_lower == artist_norm,
|
||||
Tracks.name_lower == song_norm,
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||
.filter(
|
||||
Tracks.artist_name_lower.like(f"{artist}%"),
|
||||
Tracks.name_lower.like(f"{song}%"),
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
best_match = result.first()
|
||||
best_match = result.first()
|
||||
|
||||
# If still no match, try full ILIKE (slowest)
|
||||
# Strategy 3: Normalized artist with song prefix match
|
||||
# Catches cases like "Song (Remastered)" when DB has "Song"
|
||||
if not best_match:
|
||||
artist_norm = normalize_for_search(artist)
|
||||
song_norm = normalize_for_search(song)
|
||||
|
||||
result = await db.execute(
|
||||
select(
|
||||
Tracks.artist_name,
|
||||
@@ -96,8 +143,8 @@ class LRCLib:
|
||||
)
|
||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||
.filter(
|
||||
Tracks.artist_name_lower.ilike(f"%{artist}%"),
|
||||
Tracks.name_lower.ilike(f"%{song}%"),
|
||||
Tracks.artist_name_lower == artist_norm,
|
||||
Tracks.name_lower.like(f"{song_norm}%"),
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
@@ -134,10 +181,7 @@ class LRCLib:
|
||||
input_track=input_track, candidate_tracks=[(0, returned_track)]
|
||||
)
|
||||
|
||||
if not match_result:
|
||||
return None
|
||||
|
||||
_matched, confidence = match_result
|
||||
confidence = match_result[1] if match_result else 85
|
||||
|
||||
logging.info("Result found on %s", self.label)
|
||||
time_end = time.time()
|
||||
|
||||
Reference in New Issue
Block a user