More progress re: #34

- Change of direction, LRCLib searches from /lyric/search now use internal cache - which is a PGSQL import of the LRCLib SQLite database.  Change to PGSQL was made for performance.
This commit is contained in:
2025-11-22 13:13:03 -05:00
parent c302b256d3
commit dcc6c7b24e
10 changed files with 240 additions and 398 deletions

View File

@@ -1,33 +1,25 @@
import time
import traceback
import logging
from typing import Optional, Union
from aiohttp import ClientTimeout, ClientSession
from tenacity import retry, stop_after_attempt, wait_fixed
from typing import Optional
from sqlalchemy.future import select
from lyric_search import utils
from lyric_search.constructors import LyricsResult
from . import common, cache, redis_cache
from lyric_search.constructors import InvalidLRCLibResponseException
from lyric_search.models import Tracks, Lyrics, AsyncSessionLocal
from . import redis_cache
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
class LRCLib:
"""LRCLib Search Module"""
"""LRCLib Search Module - Local PostgreSQL Database"""
def __init__(self) -> None:
self.label: str = "LRCLib"
self.lrclib_url: str = "https://lrclib.net/api/search"
self.headers: dict = common.SCRAPE_HEADERS
self.timeout = ClientTimeout(connect=3, sock_read=8)
self.label: str = "LRCLib-Cache"
self.datautils = utils.DataUtils()
self.matcher = utils.TrackMatcher()
self.cache = cache.Cache()
self.redis_cache = redis_cache.RedisCache()
@retry(stop=stop_after_attempt(2), wait=wait_fixed(0.5))
async def search(
self,
artist: str,
@@ -36,10 +28,12 @@ class LRCLib:
duration: Optional[int] = None,
) -> Optional[LyricsResult]:
"""
LRCLib Search
LRCLib Local Database Search
Args:
artist (str): the artist to search
song (str): the song to search
plain (bool): return plain lyrics (True) or synced lyrics (False)
duration (int): optional track duration for better matching
Returns:
Optional[LyricsResult]: The result, if found - None otherwise.
"""
@@ -47,140 +41,115 @@ class LRCLib:
artist = artist.strip().lower()
song = song.strip().lower()
time_start: float = time.time()
lrc_obj: Optional[list[dict]] = None
logging.info("Searching %s - %s on %s", artist, song, self.label)
input_track: str = f"{artist} - {song}"
returned_lyrics: str = ""
async with ClientSession() as client:
async with await client.get(
self.lrclib_url,
params={
"artist_name": artist,
"track_name": song,
**({"duration": duration} if duration else {}),
},
timeout=self.timeout,
headers=self.headers,
) as request:
request.raise_for_status()
text: Optional[str] = await request.text()
if not text:
raise InvalidLRCLibResponseException("No search response.")
if len(text) < 100:
raise InvalidLRCLibResponseException(
"Search response text was invalid (len < 100 chars.)"
)
search_data: Optional[Union[list, dict]] = await request.json()
if not isinstance(search_data, list | dict):
raise InvalidLRCLibResponseException("No JSON search data.")
# logging.info("Search Data:\n%s", search_data)
if not isinstance(search_data, list):
raise InvalidLRCLibResponseException("Invalid JSON.")
# Filter by duration if provided
if duration:
search_data = [
r
for r in search_data
if abs(r.get("duration", 0) - duration) <= 10
]
if plain:
possible_matches = [
(
x,
f"{result.get('artistName')} - {result.get('trackName')}",
)
for x, result in enumerate(search_data)
]
else:
logging.info(
"Limiting possible matches to only those with non-null syncedLyrics"
)
possible_matches = [
(
x,
f"{result.get('artistName')} - {result.get('trackName')}",
)
for x, result in enumerate(search_data)
if isinstance(result["syncedLyrics"], str)
]
best_match = None
try:
match_result = self.matcher.find_best_match(
input_track,
possible_matches, # type: ignore
)
if match_result:
best_match = match_result[0]
except: # noqa
pass
if not best_match:
return
best_match_id = best_match[0]
if not isinstance(search_data[best_match_id]["artistName"], str):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find artistName key.\n{search_data}"
)
if not isinstance(search_data[best_match_id]["trackName"], str):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find trackName key.\n{search_data}"
)
returned_artist: str = search_data[best_match_id]["artistName"]
returned_song: str = search_data[best_match_id]["trackName"]
if plain:
if not isinstance(
search_data[best_match_id]["plainLyrics"], str
):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find plainLyrics key.\n{search_data}"
)
returned_lyrics: str = search_data[best_match_id]["plainLyrics"]
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
else:
if not isinstance(
search_data[best_match_id]["syncedLyrics"], str
):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}"
)
returned_lyrics: str = search_data[best_match_id][
"syncedLyrics"
]
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
returned_track: str = f"{returned_artist} - {returned_song}"
match_result = self.matcher.find_best_match(
input_track=input_track, candidate_tracks=[(0, returned_track)]
async with AsyncSessionLocal() as db:
best_match = None
# Try exact match first (fastest)
result = await db.execute(
select(
Tracks.artist_name,
Tracks.name,
Lyrics.plain_lyrics,
Lyrics.synced_lyrics,
)
if not match_result:
return # No suitable match found
_matched, confidence = match_result
logging.info("Result found on %s", self.label)
time_end: float = time.time()
time_diff: float = time_end - time_start
matched = LyricsResult(
artist=returned_artist,
song=returned_song,
src=self.label,
lyrics=returned_lyrics if plain else lrc_obj, # type: ignore
confidence=confidence,
time=time_diff,
.join(Lyrics, Tracks.id == Lyrics.track_id)
.filter(
Tracks.artist_name_lower == artist,
Tracks.name_lower == song,
)
await self.redis_cache.increment_found_count(self.label)
if plain:
await self.cache.store(matched)
return matched
.limit(1)
)
best_match = result.first()
# If no exact match, try prefix match (faster than full ILIKE)
if not best_match:
result = await db.execute(
select(
Tracks.artist_name,
Tracks.name,
Lyrics.plain_lyrics,
Lyrics.synced_lyrics,
)
.join(Lyrics, Tracks.id == Lyrics.track_id)
.filter(
Tracks.artist_name_lower.like(f"{artist}%"),
Tracks.name_lower.like(f"{song}%"),
)
.limit(1)
)
best_match = result.first()
# If still no match, try full ILIKE (slowest)
if not best_match:
result = await db.execute(
select(
Tracks.artist_name,
Tracks.name,
Lyrics.plain_lyrics,
Lyrics.synced_lyrics,
)
.join(Lyrics, Tracks.id == Lyrics.track_id)
.filter(
Tracks.artist_name_lower.ilike(f"%{artist}%"),
Tracks.name_lower.ilike(f"%{song}%"),
)
.limit(1)
)
best_match = result.first()
if not best_match:
logging.info("No result found on %s", self.label)
return None
returned_artist = best_match.artist_name
returned_song = best_match.name
if plain:
if not best_match.plain_lyrics:
logging.info("No plain lyrics available on %s", self.label)
return None
returned_lyrics = best_match.plain_lyrics
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
lrc_obj = None
else:
if not best_match.synced_lyrics:
logging.info("No synced lyrics available on %s", self.label)
return None
returned_lyrics = best_match.synced_lyrics
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
# Calculate match confidence
input_track = f"{artist} - {song}"
returned_track = f"{returned_artist} - {returned_song}"
match_result = self.matcher.find_best_match(
input_track=input_track,
candidate_tracks=[(0, returned_track)]
)
if not match_result:
return None
_matched, confidence = match_result
logging.info("Result found on %s", self.label)
time_end = time.time()
time_diff = time_end - time_start
matched = LyricsResult(
artist=returned_artist,
song=returned_song,
src=self.label,
lyrics=returned_lyrics if plain else lrc_obj, # type: ignore
confidence=confidence,
time=time_diff,
)
await self.redis_cache.increment_found_count(self.label)
return matched
except Exception as e:
logging.debug("Exception: %s", str(e))
traceback.print_exc()
logging.error("Exception in %s: %s", self.label, str(e))
return None