More progress re: #34
- Change of direction, LRCLib searches from /lyric/search now use internal cache - which is a PGSQL import of the LRCLib SQLite database. Change to PGSQL was made for performance.
This commit is contained in:
@@ -14,9 +14,7 @@ class Aggregate:
|
||||
Aggregate all source methods
|
||||
"""
|
||||
|
||||
def __init__(self, exclude_methods=None) -> None:
|
||||
if not exclude_methods:
|
||||
exclude_methods: list = []
|
||||
def __init__(self, exclude_methods: list = []) -> None:
|
||||
self.exclude_methods = exclude_methods
|
||||
self.redis_cache = redis_cache.RedisCache()
|
||||
self.notifier = notifier.DiscordNotifier()
|
||||
@@ -70,14 +68,14 @@ class Aggregate:
|
||||
if plain: # do not record LRC fails
|
||||
try:
|
||||
await self.redis_cache.increment_found_count("failed")
|
||||
self.notifier.send(
|
||||
await self.notifier.send(
|
||||
"WARNING",
|
||||
f"Could not find {artist} - {song} via queried sources.",
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
logging.info("Could not increment redis failed counter: %s", str(e))
|
||||
self.notifier.send(
|
||||
await self.notifier.send(
|
||||
f"ERROR @ {__file__.rsplit('/', maxsplit=1)[-1]}",
|
||||
f"Could not increment redis failed counter: {str(e)}",
|
||||
)
|
||||
|
||||
@@ -45,11 +45,11 @@ class Genius:
|
||||
Optional[LyricsResult]: The result, if found - None otherwise.
|
||||
"""
|
||||
try:
|
||||
artist = artist.strip().lower()
|
||||
song = song.strip().lower()
|
||||
artist_name = artist.strip().lower()
|
||||
song_name = song.strip().lower()
|
||||
time_start: float = time.time()
|
||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
||||
search_term: str = f"{artist}%20{song}"
|
||||
logging.info("Searching %s - %s on %s", artist_name, song_name, self.label)
|
||||
search_term: str = f"{artist_name}%20{song_name}"
|
||||
returned_lyrics: str = ""
|
||||
async with ClientSession() as client:
|
||||
async with client.get(
|
||||
@@ -100,10 +100,13 @@ class Genius:
|
||||
)
|
||||
for returned in possible_matches
|
||||
]
|
||||
searched: str = f"{artist} - {song}"
|
||||
best_match: tuple = self.matcher.find_best_match(
|
||||
searched: str = f"{artist_name} - {song_name}"
|
||||
best_match: Optional[tuple] = self.matcher.find_best_match(
|
||||
input_track=searched, candidate_tracks=to_scrape
|
||||
)
|
||||
if not best_match:
|
||||
raise InvalidGeniusResponseException("No matching result")
|
||||
|
||||
logging.info("To scrape: %s", to_scrape)
|
||||
((scrape_stub, track), confidence) = best_match
|
||||
scrape_url: str = f"{self.genius_url}{scrape_stub[1:]}"
|
||||
@@ -157,8 +160,8 @@ class Genius:
|
||||
returned_lyrics: str = self.datautils.scrub_lyrics(
|
||||
returned_lyrics
|
||||
)
|
||||
artist: str = track.split(" - ", maxsplit=1)[0]
|
||||
song: str = track.split(" - ", maxsplit=1)[1]
|
||||
artist = track.split(" - ", maxsplit=1)[0]
|
||||
song = track.split(" - ", maxsplit=1)[1]
|
||||
logging.info("Result found on %s", self.label)
|
||||
time_end: float = time.time()
|
||||
time_diff: float = time_end - time_start
|
||||
|
||||
@@ -1,33 +1,25 @@
|
||||
import time
|
||||
|
||||
import traceback
|
||||
import logging
|
||||
from typing import Optional, Union
|
||||
from aiohttp import ClientTimeout, ClientSession
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||
from typing import Optional
|
||||
from sqlalchemy.future import select
|
||||
from lyric_search import utils
|
||||
from lyric_search.constructors import LyricsResult
|
||||
from . import common, cache, redis_cache
|
||||
from lyric_search.constructors import InvalidLRCLibResponseException
|
||||
from lyric_search.models import Tracks, Lyrics, AsyncSessionLocal
|
||||
from . import redis_cache
|
||||
|
||||
logger = logging.getLogger()
|
||||
log_level = logging.getLevelName(logger.level)
|
||||
|
||||
|
||||
class LRCLib:
|
||||
"""LRCLib Search Module"""
|
||||
"""LRCLib Search Module - Local PostgreSQL Database"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.label: str = "LRCLib"
|
||||
self.lrclib_url: str = "https://lrclib.net/api/search"
|
||||
self.headers: dict = common.SCRAPE_HEADERS
|
||||
self.timeout = ClientTimeout(connect=3, sock_read=8)
|
||||
self.label: str = "LRCLib-Cache"
|
||||
self.datautils = utils.DataUtils()
|
||||
self.matcher = utils.TrackMatcher()
|
||||
self.cache = cache.Cache()
|
||||
self.redis_cache = redis_cache.RedisCache()
|
||||
|
||||
@retry(stop=stop_after_attempt(2), wait=wait_fixed(0.5))
|
||||
async def search(
|
||||
self,
|
||||
artist: str,
|
||||
@@ -36,10 +28,12 @@ class LRCLib:
|
||||
duration: Optional[int] = None,
|
||||
) -> Optional[LyricsResult]:
|
||||
"""
|
||||
LRCLib Search
|
||||
LRCLib Local Database Search
|
||||
Args:
|
||||
artist (str): the artist to search
|
||||
song (str): the song to search
|
||||
plain (bool): return plain lyrics (True) or synced lyrics (False)
|
||||
duration (int): optional track duration for better matching
|
||||
Returns:
|
||||
Optional[LyricsResult]: The result, if found - None otherwise.
|
||||
"""
|
||||
@@ -47,140 +41,115 @@ class LRCLib:
|
||||
artist = artist.strip().lower()
|
||||
song = song.strip().lower()
|
||||
time_start: float = time.time()
|
||||
lrc_obj: Optional[list[dict]] = None
|
||||
|
||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
||||
|
||||
input_track: str = f"{artist} - {song}"
|
||||
returned_lyrics: str = ""
|
||||
async with ClientSession() as client:
|
||||
async with await client.get(
|
||||
self.lrclib_url,
|
||||
params={
|
||||
"artist_name": artist,
|
||||
"track_name": song,
|
||||
**({"duration": duration} if duration else {}),
|
||||
},
|
||||
timeout=self.timeout,
|
||||
headers=self.headers,
|
||||
) as request:
|
||||
request.raise_for_status()
|
||||
|
||||
text: Optional[str] = await request.text()
|
||||
if not text:
|
||||
raise InvalidLRCLibResponseException("No search response.")
|
||||
if len(text) < 100:
|
||||
raise InvalidLRCLibResponseException(
|
||||
"Search response text was invalid (len < 100 chars.)"
|
||||
)
|
||||
|
||||
search_data: Optional[Union[list, dict]] = await request.json()
|
||||
if not isinstance(search_data, list | dict):
|
||||
raise InvalidLRCLibResponseException("No JSON search data.")
|
||||
|
||||
# logging.info("Search Data:\n%s", search_data)
|
||||
|
||||
if not isinstance(search_data, list):
|
||||
raise InvalidLRCLibResponseException("Invalid JSON.")
|
||||
|
||||
# Filter by duration if provided
|
||||
if duration:
|
||||
search_data = [
|
||||
r
|
||||
for r in search_data
|
||||
if abs(r.get("duration", 0) - duration) <= 10
|
||||
]
|
||||
|
||||
if plain:
|
||||
possible_matches = [
|
||||
(
|
||||
x,
|
||||
f"{result.get('artistName')} - {result.get('trackName')}",
|
||||
)
|
||||
for x, result in enumerate(search_data)
|
||||
]
|
||||
else:
|
||||
logging.info(
|
||||
"Limiting possible matches to only those with non-null syncedLyrics"
|
||||
)
|
||||
possible_matches = [
|
||||
(
|
||||
x,
|
||||
f"{result.get('artistName')} - {result.get('trackName')}",
|
||||
)
|
||||
for x, result in enumerate(search_data)
|
||||
if isinstance(result["syncedLyrics"], str)
|
||||
]
|
||||
|
||||
best_match = None
|
||||
try:
|
||||
match_result = self.matcher.find_best_match(
|
||||
input_track,
|
||||
possible_matches, # type: ignore
|
||||
)
|
||||
if match_result:
|
||||
best_match = match_result[0]
|
||||
except: # noqa
|
||||
pass
|
||||
|
||||
if not best_match:
|
||||
return
|
||||
best_match_id = best_match[0]
|
||||
|
||||
if not isinstance(search_data[best_match_id]["artistName"], str):
|
||||
raise InvalidLRCLibResponseException(
|
||||
f"Invalid JSON: Cannot find artistName key.\n{search_data}"
|
||||
)
|
||||
|
||||
if not isinstance(search_data[best_match_id]["trackName"], str):
|
||||
raise InvalidLRCLibResponseException(
|
||||
f"Invalid JSON: Cannot find trackName key.\n{search_data}"
|
||||
)
|
||||
|
||||
returned_artist: str = search_data[best_match_id]["artistName"]
|
||||
returned_song: str = search_data[best_match_id]["trackName"]
|
||||
if plain:
|
||||
if not isinstance(
|
||||
search_data[best_match_id]["plainLyrics"], str
|
||||
):
|
||||
raise InvalidLRCLibResponseException(
|
||||
f"Invalid JSON: Cannot find plainLyrics key.\n{search_data}"
|
||||
)
|
||||
returned_lyrics: str = search_data[best_match_id]["plainLyrics"]
|
||||
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
|
||||
else:
|
||||
if not isinstance(
|
||||
search_data[best_match_id]["syncedLyrics"], str
|
||||
):
|
||||
raise InvalidLRCLibResponseException(
|
||||
f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}"
|
||||
)
|
||||
returned_lyrics: str = search_data[best_match_id][
|
||||
"syncedLyrics"
|
||||
]
|
||||
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
|
||||
returned_track: str = f"{returned_artist} - {returned_song}"
|
||||
match_result = self.matcher.find_best_match(
|
||||
input_track=input_track, candidate_tracks=[(0, returned_track)]
|
||||
async with AsyncSessionLocal() as db:
|
||||
best_match = None
|
||||
|
||||
# Try exact match first (fastest)
|
||||
result = await db.execute(
|
||||
select(
|
||||
Tracks.artist_name,
|
||||
Tracks.name,
|
||||
Lyrics.plain_lyrics,
|
||||
Lyrics.synced_lyrics,
|
||||
)
|
||||
if not match_result:
|
||||
return # No suitable match found
|
||||
_matched, confidence = match_result
|
||||
logging.info("Result found on %s", self.label)
|
||||
time_end: float = time.time()
|
||||
time_diff: float = time_end - time_start
|
||||
matched = LyricsResult(
|
||||
artist=returned_artist,
|
||||
song=returned_song,
|
||||
src=self.label,
|
||||
lyrics=returned_lyrics if plain else lrc_obj, # type: ignore
|
||||
confidence=confidence,
|
||||
time=time_diff,
|
||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||
.filter(
|
||||
Tracks.artist_name_lower == artist,
|
||||
Tracks.name_lower == song,
|
||||
)
|
||||
await self.redis_cache.increment_found_count(self.label)
|
||||
if plain:
|
||||
await self.cache.store(matched)
|
||||
return matched
|
||||
.limit(1)
|
||||
)
|
||||
best_match = result.first()
|
||||
|
||||
# If no exact match, try prefix match (faster than full ILIKE)
|
||||
if not best_match:
|
||||
result = await db.execute(
|
||||
select(
|
||||
Tracks.artist_name,
|
||||
Tracks.name,
|
||||
Lyrics.plain_lyrics,
|
||||
Lyrics.synced_lyrics,
|
||||
)
|
||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||
.filter(
|
||||
Tracks.artist_name_lower.like(f"{artist}%"),
|
||||
Tracks.name_lower.like(f"{song}%"),
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
best_match = result.first()
|
||||
|
||||
# If still no match, try full ILIKE (slowest)
|
||||
if not best_match:
|
||||
result = await db.execute(
|
||||
select(
|
||||
Tracks.artist_name,
|
||||
Tracks.name,
|
||||
Lyrics.plain_lyrics,
|
||||
Lyrics.synced_lyrics,
|
||||
)
|
||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||
.filter(
|
||||
Tracks.artist_name_lower.ilike(f"%{artist}%"),
|
||||
Tracks.name_lower.ilike(f"%{song}%"),
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
best_match = result.first()
|
||||
|
||||
if not best_match:
|
||||
logging.info("No result found on %s", self.label)
|
||||
return None
|
||||
|
||||
returned_artist = best_match.artist_name
|
||||
returned_song = best_match.name
|
||||
|
||||
if plain:
|
||||
if not best_match.plain_lyrics:
|
||||
logging.info("No plain lyrics available on %s", self.label)
|
||||
return None
|
||||
returned_lyrics = best_match.plain_lyrics
|
||||
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
|
||||
lrc_obj = None
|
||||
else:
|
||||
if not best_match.synced_lyrics:
|
||||
logging.info("No synced lyrics available on %s", self.label)
|
||||
return None
|
||||
returned_lyrics = best_match.synced_lyrics
|
||||
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
|
||||
|
||||
# Calculate match confidence
|
||||
input_track = f"{artist} - {song}"
|
||||
returned_track = f"{returned_artist} - {returned_song}"
|
||||
match_result = self.matcher.find_best_match(
|
||||
input_track=input_track,
|
||||
candidate_tracks=[(0, returned_track)]
|
||||
)
|
||||
|
||||
if not match_result:
|
||||
return None
|
||||
|
||||
_matched, confidence = match_result
|
||||
|
||||
logging.info("Result found on %s", self.label)
|
||||
time_end = time.time()
|
||||
time_diff = time_end - time_start
|
||||
|
||||
matched = LyricsResult(
|
||||
artist=returned_artist,
|
||||
song=returned_song,
|
||||
src=self.label,
|
||||
lyrics=returned_lyrics if plain else lrc_obj, # type: ignore
|
||||
confidence=confidence,
|
||||
time=time_diff,
|
||||
)
|
||||
|
||||
await self.redis_cache.increment_found_count(self.label)
|
||||
return matched
|
||||
|
||||
except Exception as e:
|
||||
logging.debug("Exception: %s", str(e))
|
||||
traceback.print_exc()
|
||||
logging.error("Exception in %s: %s", self.label, str(e))
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user