From f86d5a4fec18dcb2801846e7007614de25cf1e94 Mon Sep 17 00:00:00 2001 From: codey Date: Tue, 14 Jan 2025 10:04:05 -0500 Subject: [PATCH] progress -- change get to search for lrclib --- endpoints/lyric_search.py | 6 ++ lyric_search_new/sources/aggregate.py | 15 +++-- lyric_search_new/sources/cache.py | 3 +- lyric_search_new/sources/lrclib.py | 82 +++++++++++++++++++++++++++ lyric_search_new/tests.py | 13 ++++- lyric_search_new/utils.py | 2 +- 6 files changed, 113 insertions(+), 8 deletions(-) create mode 100644 lyric_search_new/sources/lrclib.py diff --git a/endpoints/lyric_search.py b/endpoints/lyric_search.py index e9d899b..cf7f644 100644 --- a/endpoints/lyric_search.py +++ b/endpoints/lyric_search.py @@ -123,8 +123,14 @@ class LyricSearch(FastAPI): aggregate_search = aggregate.Aggregate() result = await aggregate_search.search(data.a, data.s) + if not result: + return { + 'err': True, + 'errorText': 'Sources exhausted, lyrics not located.', + } result = result.dict() result['lyrics'] = regex.sub(r'(\s/\s|\n)', '
', result['lyrics']).strip() + result['confidence'] = f'{float(result.get('confidence', 0)):.2f}' return result diff --git a/lyric_search_new/sources/aggregate.py b/lyric_search_new/sources/aggregate.py index 591a9e6..76d0937 100644 --- a/lyric_search_new/sources/aggregate.py +++ b/lyric_search_new/sources/aggregate.py @@ -8,6 +8,7 @@ sys.path.insert(1,'..') sys.path.insert(1,'..') from . import cache from . import genius +from . import lrclib class Aggregate: """Aggregate all source methods""" @@ -20,15 +21,21 @@ class Aggregate: async def search(self, artist: str, song: str) -> Optional[LyricsResult]: cache_search = cache.Cache() genius_search = genius.Genius() + lrclib_search = lrclib.LRCLib() search = None if "cache" not in self.exclude_methods: # First, try cache search = await cache_search.search(artist, song) if not search: print("Cache: NOT FOUND!") - # Then try Genius - if "genius" in self.exclude_methods: - return # Skipped last possible source, return None - search = await genius_search.search(artist, song) + # Then try LRCLib + if "lrclib" not in self.exclude_methods: + search = await lrclib_search.search(artist, song) + if not search: + print("LRCLib: Not found!") + # Then try Genius + if "genius" in self.exclude_methods: + return # Skipped last possible source, return None + search = await genius_search.search(artist, song) return search diff --git a/lyric_search_new/sources/cache.py b/lyric_search_new/sources/cache.py index 0b19e6b..36c0ac8 100644 --- a/lyric_search_new/sources/cache.py +++ b/lyric_search_new/sources/cache.py @@ -48,7 +48,8 @@ class Cache: for ext in self.sqlite_exts: await db_conn.load_extension(ext) async with await db_conn.executescript(self.cache_pre_query) as _db_cursor: - search_query = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics WHERE editdist3((artist || " " || song), (? || " " || ?))\ + search_query = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\ + WHERE editdist3((artist || " " || song), (? || " " || ?))\ <= 410 ORDER BY editdist3((artist || " " || song), ?) ASC LIMIT 10' search_params = (artist.strip(), song.strip(), f"{artist.strip()} {song.strip()}") diff --git a/lyric_search_new/sources/lrclib.py b/lyric_search_new/sources/lrclib.py new file mode 100644 index 0000000..7b0d26a --- /dev/null +++ b/lyric_search_new/sources/lrclib.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3.12 +# pylint: disable=bare-except, broad-exception-caught, wrong-import-position + +import sys +sys.path.insert(1,'..') +import traceback +from aiohttp import ClientTimeout, ClientSession +from lyric_search_new import utils +from lyric_search_new.constructors import LyricsResult +from . import common + +class InvalidResponseException(Exception): + """ + Invalid Response Exception + """ + +class LRCLib: + """LRCLib Search Module""" + def __init__(self): + self.label = "LRCLib" + self.lrclib_url = "https://lrclib.net/api/get" + self.headers = common.SCRAPE_HEADERS + self.timeout = ClientTimeout(connect=2, sock_read=4) + self.datautils = utils.DataUtils() + self.matcher = utils.TrackMatcher() + + async def search(self, artist: str, song: str): + """ + @artist: the artist to search + @song: the song to search + """ + try: + artist = artist.strip().lower() + song = song.strip().lower() + returned_lyrics = '' + async with ClientSession() as client: + async with client.get(self.lrclib_url, + params = { + 'artist_name': artist, + 'track_name': song, + }, + timeout=self.timeout, + headers=self.headers) as request: + request.raise_for_status() + text = await request.text() + + if len(text) < 100: + raise InvalidResponseException("Search response text was invalid (len < 100 chars.)") + + search_data = await request.json() + + if not isinstance(search_data, dict): + raise InvalidResponseException("Invalid JSON.") + + if not isinstance(search_data['artistName'], str): + raise InvalidResponseException(f"Invalid JSON: Cannot find artistName key.\n{search_data}") + + if not isinstance(search_data['trackName'], str): + raise InvalidResponseException(f"Invalid JSON: Cannot find trackName key.\n{search_data}") + + returned_artist = search_data['artistName'] + returned_song = search_data['trackName'] + returned_lyrics = search_data['plainLyrics'] + returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics) + input_track = f"{artist} - {song}" + returned_track = f"{artist} - {song}" + (matched, confidence) = self.matcher.find_best_match(input_track=input_track, + candidate_tracks=[(0, returned_track)]) + if not confidence: + print(f"I have no confidence! {confidence}, input_track: {input_track}; candidates: {[returned_track]}") + return # No suitable match found + print("Returning!") + return LyricsResult(artist=returned_artist, + song=returned_song, + src=self.label, + lyrics=returned_lyrics, + confidence=confidence) + except: + traceback.print_exc() + return + + diff --git a/lyric_search_new/tests.py b/lyric_search_new/tests.py index a697af5..6219d5d 100644 --- a/lyric_search_new/tests.py +++ b/lyric_search_new/tests.py @@ -4,7 +4,7 @@ import asyncio import sys sys.path.insert(1, '.') -import sources.cache, sources.genius, sources.aggregate +import sources.cache, sources.genius, sources.aggregate, sources.lrclib test_artist = "hopsin" test_song = "ill mind of hopsin 5" @@ -28,6 +28,14 @@ async def test_genius(artist=None, song=None): result = await genius.search(artist, song) print(result) +async def test_lrclib(artist=None, song=None): + if not artist or not song: + artist = test_artist + song = test_song + lrclib = sources.lrclib.LRCLib() + result = await lrclib.search(artist, song) + print(result) + async def test_aggregate(artist=None, song=None): if not artist or not song: artist = test_artist @@ -40,5 +48,6 @@ async def test_aggregate(artist=None, song=None): loop = asyncio.new_event_loop() # loop.run_until_complete(test_genius()) -loop.run_until_complete(test_cache(artist=test_artist, song=test_song)) +loop.run_until_complete(test_lrclib()) +# loop.run_until_complete(test_cache(artist=test_artist, song=test_song)) # loop.run_until_complete(test_aggregate()) diff --git a/lyric_search_new/utils.py b/lyric_search_new/utils.py index f49ee41..e2b0c59 100644 --- a/lyric_search_new/utils.py +++ b/lyric_search_new/utils.py @@ -55,7 +55,7 @@ class TrackMatcher: best_match = candidate # Return the match only if it meets the threshold - return (best_match, best_score) if best_score >= self.threshold else None + return (best_match, round(best_score, 2)) if best_score >= self.threshold else None def _normalize_string(self, text: str) -> str: """