Files
api/lyric_search/sources/lrclib.py

156 lines
5.9 KiB
Python
Raw Normal View History

2025-01-15 20:17:49 -05:00
import time
2025-01-14 11:10:13 -05:00
import logging
from typing import Optional
from sqlalchemy.future import select
from lyric_search import utils
from lyric_search.constructors import LyricsResult
from lyric_search.models import Tracks, Lyrics, AsyncSessionLocal
from . import redis_cache
2025-01-14 11:10:13 -05:00
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
class LRCLib:
"""LRCLib Search Module - Local PostgreSQL Database"""
2025-01-19 07:09:05 -05:00
def __init__(self) -> None:
self.label: str = "LRCLib-Cache"
self.datautils = utils.DataUtils()
self.matcher = utils.TrackMatcher()
2025-01-22 06:38:40 -05:00
self.redis_cache = redis_cache.RedisCache()
async def search(
2025-09-27 09:29:53 -04:00
self,
artist: str,
song: str,
plain: Optional[bool] = True,
duration: Optional[int] = None,
) -> Optional[LyricsResult]:
"""
LRCLib Local Database Search
2025-01-19 07:01:07 -05:00
Args:
artist (str): the artist to search
song (str): the song to search
plain (bool): return plain lyrics (True) or synced lyrics (False)
duration (int): optional track duration for better matching
2025-01-19 07:01:07 -05:00
Returns:
2025-02-15 21:09:33 -05:00
Optional[LyricsResult]: The result, if found - None otherwise.
"""
try:
artist = artist.strip().lower()
song = song.strip().lower()
2025-01-15 20:17:49 -05:00
time_start: float = time.time()
2025-01-17 07:48:29 -05:00
logging.info("Searching %s - %s on %s", artist, song, self.label)
async with AsyncSessionLocal() as db:
best_match = None
# Try exact match first (fastest)
result = await db.execute(
select(
Tracks.artist_name,
Tracks.name,
Lyrics.plain_lyrics,
Lyrics.synced_lyrics,
)
.join(Lyrics, Tracks.id == Lyrics.track_id)
.filter(
Tracks.artist_name_lower == artist,
Tracks.name_lower == song,
)
.limit(1)
)
best_match = result.first()
# If no exact match, try prefix match (faster than full ILIKE)
if not best_match:
result = await db.execute(
select(
Tracks.artist_name,
Tracks.name,
Lyrics.plain_lyrics,
Lyrics.synced_lyrics,
)
.join(Lyrics, Tracks.id == Lyrics.track_id)
.filter(
Tracks.artist_name_lower.like(f"{artist}%"),
Tracks.name_lower.like(f"{song}%"),
)
.limit(1)
)
best_match = result.first()
# If still no match, try full ILIKE (slowest)
if not best_match:
result = await db.execute(
select(
Tracks.artist_name,
Tracks.name,
Lyrics.plain_lyrics,
Lyrics.synced_lyrics,
)
.join(Lyrics, Tracks.id == Lyrics.track_id)
.filter(
Tracks.artist_name_lower.ilike(f"%{artist}%"),
Tracks.name_lower.ilike(f"%{song}%"),
)
.limit(1)
)
best_match = result.first()
if not best_match:
logging.info("No result found on %s", self.label)
return None
returned_artist = best_match.artist_name
returned_song = best_match.name
if plain:
if not best_match.plain_lyrics:
logging.info("No plain lyrics available on %s", self.label)
return None
returned_lyrics = best_match.plain_lyrics
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
lrc_obj = None
else:
if not best_match.synced_lyrics:
logging.info("No synced lyrics available on %s", self.label)
return None
returned_lyrics = best_match.synced_lyrics
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
# Calculate match confidence
input_track = f"{artist} - {song}"
returned_track = f"{returned_artist} - {returned_song}"
match_result = self.matcher.find_best_match(
input_track=input_track,
candidate_tracks=[(0, returned_track)]
)
if not match_result:
return None
_matched, confidence = match_result
logging.info("Result found on %s", self.label)
time_end = time.time()
time_diff = time_end - time_start
matched = LyricsResult(
artist=returned_artist,
song=returned_song,
src=self.label,
lyrics=returned_lyrics if plain else lrc_obj, # type: ignore
confidence=confidence,
time=time_diff,
)
await self.redis_cache.increment_found_count(self.label)
return matched
2025-04-26 21:27:55 -04:00
except Exception as e:
logging.error("Exception in %s: %s", self.label, str(e))
return None