progress -- change get to search for lrclib
This commit is contained in:
parent
9d8d38081a
commit
f86d5a4fec
@ -123,8 +123,14 @@ class LyricSearch(FastAPI):
|
|||||||
|
|
||||||
aggregate_search = aggregate.Aggregate()
|
aggregate_search = aggregate.Aggregate()
|
||||||
result = await aggregate_search.search(data.a, data.s)
|
result = await aggregate_search.search(data.a, data.s)
|
||||||
|
if not result:
|
||||||
|
return {
|
||||||
|
'err': True,
|
||||||
|
'errorText': 'Sources exhausted, lyrics not located.',
|
||||||
|
}
|
||||||
result = result.dict()
|
result = result.dict()
|
||||||
result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
|
result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
|
||||||
|
result['confidence'] = f'{float(result.get('confidence', 0)):.2f}'
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ sys.path.insert(1,'..')
|
|||||||
sys.path.insert(1,'..')
|
sys.path.insert(1,'..')
|
||||||
from . import cache
|
from . import cache
|
||||||
from . import genius
|
from . import genius
|
||||||
|
from . import lrclib
|
||||||
|
|
||||||
class Aggregate:
|
class Aggregate:
|
||||||
"""Aggregate all source methods"""
|
"""Aggregate all source methods"""
|
||||||
@ -20,12 +21,18 @@ class Aggregate:
|
|||||||
async def search(self, artist: str, song: str) -> Optional[LyricsResult]:
|
async def search(self, artist: str, song: str) -> Optional[LyricsResult]:
|
||||||
cache_search = cache.Cache()
|
cache_search = cache.Cache()
|
||||||
genius_search = genius.Genius()
|
genius_search = genius.Genius()
|
||||||
|
lrclib_search = lrclib.LRCLib()
|
||||||
search = None
|
search = None
|
||||||
if "cache" not in self.exclude_methods:
|
if "cache" not in self.exclude_methods:
|
||||||
# First, try cache
|
# First, try cache
|
||||||
search = await cache_search.search(artist, song)
|
search = await cache_search.search(artist, song)
|
||||||
if not search:
|
if not search:
|
||||||
print("Cache: NOT FOUND!")
|
print("Cache: NOT FOUND!")
|
||||||
|
# Then try LRCLib
|
||||||
|
if "lrclib" not in self.exclude_methods:
|
||||||
|
search = await lrclib_search.search(artist, song)
|
||||||
|
if not search:
|
||||||
|
print("LRCLib: Not found!")
|
||||||
# Then try Genius
|
# Then try Genius
|
||||||
if "genius" in self.exclude_methods:
|
if "genius" in self.exclude_methods:
|
||||||
return # Skipped last possible source, return None
|
return # Skipped last possible source, return None
|
||||||
|
@ -48,7 +48,8 @@ class Cache:
|
|||||||
for ext in self.sqlite_exts:
|
for ext in self.sqlite_exts:
|
||||||
await db_conn.load_extension(ext)
|
await db_conn.load_extension(ext)
|
||||||
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
|
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
|
||||||
search_query = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics WHERE editdist3((artist || " " || song), (? || " " || ?))\
|
search_query = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
|
||||||
|
WHERE editdist3((artist || " " || song), (? || " " || ?))\
|
||||||
<= 410 ORDER BY editdist3((artist || " " || song), ?) ASC LIMIT 10'
|
<= 410 ORDER BY editdist3((artist || " " || song), ?) ASC LIMIT 10'
|
||||||
search_params = (artist.strip(), song.strip(),
|
search_params = (artist.strip(), song.strip(),
|
||||||
f"{artist.strip()} {song.strip()}")
|
f"{artist.strip()} {song.strip()}")
|
||||||
|
82
lyric_search_new/sources/lrclib.py
Normal file
82
lyric_search_new/sources/lrclib.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
#!/usr/bin/env python3.12
|
||||||
|
# pylint: disable=bare-except, broad-exception-caught, wrong-import-position
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(1,'..')
|
||||||
|
import traceback
|
||||||
|
from aiohttp import ClientTimeout, ClientSession
|
||||||
|
from lyric_search_new import utils
|
||||||
|
from lyric_search_new.constructors import LyricsResult
|
||||||
|
from . import common
|
||||||
|
|
||||||
|
class InvalidResponseException(Exception):
|
||||||
|
"""
|
||||||
|
Invalid Response Exception
|
||||||
|
"""
|
||||||
|
|
||||||
|
class LRCLib:
|
||||||
|
"""LRCLib Search Module"""
|
||||||
|
def __init__(self):
|
||||||
|
self.label = "LRCLib"
|
||||||
|
self.lrclib_url = "https://lrclib.net/api/get"
|
||||||
|
self.headers = common.SCRAPE_HEADERS
|
||||||
|
self.timeout = ClientTimeout(connect=2, sock_read=4)
|
||||||
|
self.datautils = utils.DataUtils()
|
||||||
|
self.matcher = utils.TrackMatcher()
|
||||||
|
|
||||||
|
async def search(self, artist: str, song: str):
|
||||||
|
"""
|
||||||
|
@artist: the artist to search
|
||||||
|
@song: the song to search
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
artist = artist.strip().lower()
|
||||||
|
song = song.strip().lower()
|
||||||
|
returned_lyrics = ''
|
||||||
|
async with ClientSession() as client:
|
||||||
|
async with client.get(self.lrclib_url,
|
||||||
|
params = {
|
||||||
|
'artist_name': artist,
|
||||||
|
'track_name': song,
|
||||||
|
},
|
||||||
|
timeout=self.timeout,
|
||||||
|
headers=self.headers) as request:
|
||||||
|
request.raise_for_status()
|
||||||
|
text = await request.text()
|
||||||
|
|
||||||
|
if len(text) < 100:
|
||||||
|
raise InvalidResponseException("Search response text was invalid (len < 100 chars.)")
|
||||||
|
|
||||||
|
search_data = await request.json()
|
||||||
|
|
||||||
|
if not isinstance(search_data, dict):
|
||||||
|
raise InvalidResponseException("Invalid JSON.")
|
||||||
|
|
||||||
|
if not isinstance(search_data['artistName'], str):
|
||||||
|
raise InvalidResponseException(f"Invalid JSON: Cannot find artistName key.\n{search_data}")
|
||||||
|
|
||||||
|
if not isinstance(search_data['trackName'], str):
|
||||||
|
raise InvalidResponseException(f"Invalid JSON: Cannot find trackName key.\n{search_data}")
|
||||||
|
|
||||||
|
returned_artist = search_data['artistName']
|
||||||
|
returned_song = search_data['trackName']
|
||||||
|
returned_lyrics = search_data['plainLyrics']
|
||||||
|
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
|
||||||
|
input_track = f"{artist} - {song}"
|
||||||
|
returned_track = f"{artist} - {song}"
|
||||||
|
(matched, confidence) = self.matcher.find_best_match(input_track=input_track,
|
||||||
|
candidate_tracks=[(0, returned_track)])
|
||||||
|
if not confidence:
|
||||||
|
print(f"I have no confidence! {confidence}, input_track: {input_track}; candidates: {[returned_track]}")
|
||||||
|
return # No suitable match found
|
||||||
|
print("Returning!")
|
||||||
|
return LyricsResult(artist=returned_artist,
|
||||||
|
song=returned_song,
|
||||||
|
src=self.label,
|
||||||
|
lyrics=returned_lyrics,
|
||||||
|
confidence=confidence)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return
|
||||||
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import sys
|
import sys
|
||||||
sys.path.insert(1, '.')
|
sys.path.insert(1, '.')
|
||||||
import sources.cache, sources.genius, sources.aggregate
|
import sources.cache, sources.genius, sources.aggregate, sources.lrclib
|
||||||
|
|
||||||
test_artist = "hopsin"
|
test_artist = "hopsin"
|
||||||
test_song = "ill mind of hopsin 5"
|
test_song = "ill mind of hopsin 5"
|
||||||
@ -28,6 +28,14 @@ async def test_genius(artist=None, song=None):
|
|||||||
result = await genius.search(artist, song)
|
result = await genius.search(artist, song)
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
|
async def test_lrclib(artist=None, song=None):
|
||||||
|
if not artist or not song:
|
||||||
|
artist = test_artist
|
||||||
|
song = test_song
|
||||||
|
lrclib = sources.lrclib.LRCLib()
|
||||||
|
result = await lrclib.search(artist, song)
|
||||||
|
print(result)
|
||||||
|
|
||||||
async def test_aggregate(artist=None, song=None):
|
async def test_aggregate(artist=None, song=None):
|
||||||
if not artist or not song:
|
if not artist or not song:
|
||||||
artist = test_artist
|
artist = test_artist
|
||||||
@ -40,5 +48,6 @@ async def test_aggregate(artist=None, song=None):
|
|||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
# loop.run_until_complete(test_genius())
|
# loop.run_until_complete(test_genius())
|
||||||
loop.run_until_complete(test_cache(artist=test_artist, song=test_song))
|
loop.run_until_complete(test_lrclib())
|
||||||
|
# loop.run_until_complete(test_cache(artist=test_artist, song=test_song))
|
||||||
# loop.run_until_complete(test_aggregate())
|
# loop.run_until_complete(test_aggregate())
|
||||||
|
@ -55,7 +55,7 @@ class TrackMatcher:
|
|||||||
best_match = candidate
|
best_match = candidate
|
||||||
|
|
||||||
# Return the match only if it meets the threshold
|
# Return the match only if it meets the threshold
|
||||||
return (best_match, best_score) if best_score >= self.threshold else None
|
return (best_match, round(best_score, 2)) if best_score >= self.threshold else None
|
||||||
|
|
||||||
def _normalize_string(self, text: str) -> str:
|
def _normalize_string(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user