radio_util: open tracks SQLite DB in readonly mode; black: reformat files

This commit is contained in:
2025-04-17 07:28:05 -04:00
parent 96add377df
commit 6c88c23a4d
25 changed files with 1913 additions and 1340 deletions

View File

@ -4,23 +4,26 @@ from lyric_search import notifier
import sys
import logging
import traceback
sys.path.insert(1,'..')
sys.path.insert(1, "..")
from . import cache, redis_cache, genius, lrclib
class Aggregate:
"""
Aggregate all source methods
"""
def __init__(self, exclude_methods=None) -> None:
if not exclude_methods:
exclude_methods: list = []
self.exclude_methods = exclude_methods
self.redis_cache = redis_cache.RedisCache()
self.notifier = notifier.DiscordNotifier()
async def search(self, artist: str, song: str,
plain: Optional[bool] = True) -> Optional[LyricsResult]:
async def search(
self, artist: str, song: str, plain: Optional[bool] = True
) -> Optional[LyricsResult]:
"""
Aggregate Search
Args:
@ -41,37 +44,41 @@ class Aggregate:
cache_search,
lrclib_search,
genius_search,
]
]
if not plain:
sources = [lrclib_search] # Only LRCLib supported for synced lyrics
sources = [lrclib_search] # Only LRCLib supported for synced lyrics
search_result: Optional[LyricsResult] = None
for source in sources:
if source.label.lower() in self.exclude_methods:
if not plain:
logging.info("Exclude conditions rejected - source requested to exclude: %s, plain: %s",
source.label, plain)
logging.info(
"Exclude conditions rejected - source requested to exclude: %s, plain: %s",
source.label,
plain,
)
else:
if plain:
logging.info("Skipping source: %s, excluded.", source.label)
continue
search_result = await source.search(artist=artist, song=song,
plain=plain)
search_result = await source.search(artist=artist, song=song, plain=plain)
if search_result:
break
logging.info("%s: NOT FOUND!", source.label)
if not search_result:
logging.info("%s - %s: all sources exhausted, not found.",
artist, song)
if plain: # do not record LRC fails
try:
logging.info("%s - %s: all sources exhausted, not found.", artist, song)
if plain: # do not record LRC fails
try:
await self.redis_cache.increment_found_count("failed")
self.notifier.send("WARNING",
f"Could not find {artist} - {song} via queried sources.")
self.notifier.send(
"WARNING",
f"Could not find {artist} - {song} via queried sources.",
)
except Exception as e:
traceback.print_exc()
logging.info("Could not increment redis failed counter: %s",
str(e))
self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",
f"Could not increment redis failed counter: {str(e)}")
return search_result
logging.info("Could not increment redis failed counter: %s", str(e))
self.notifier.send(
f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",
f"Could not increment redis failed counter: {str(e)}",
)
return search_result

View File

@ -4,8 +4,9 @@ import regex
import logging
import sys
import traceback
sys.path.insert(1,'..')
sys.path.insert(1,'.')
sys.path.insert(1, "..")
sys.path.insert(1, ".")
from typing import Optional, Union, LiteralString
import aiosqlite as sqlite3
from . import redis_cache
@ -15,27 +16,38 @@ from lyric_search.constructors import LyricsResult
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
class Cache:
"""Cache Search Module"""
def __init__(self) -> None:
self.cache_db: Union[str, LiteralString] = os.path.join("/", "usr", "local", "share",
"sqlite_dbs", "cached_lyrics.db")
self.cache_db: Union[str, LiteralString] = os.path.join(
"/", "usr", "local", "share", "sqlite_dbs", "cached_lyrics.db"
)
self.redis_cache = redis_cache.RedisCache()
self.notifier = notifier.DiscordNotifier()
self.cache_pre_query: str = "pragma journal_mode = WAL; pragma synchronous = normal;\
self.cache_pre_query: str = (
"pragma journal_mode = WAL; pragma synchronous = normal;\
pragma temp_store = memory; pragma mmap_size = 30000000000;"
self.sqlite_exts: list[str] = ['/home/api/api/solibs/spellfix1.cpython-311-x86_64-linux-gnu.so']
)
self.sqlite_exts: list[str] = [
"/home/api/api/solibs/spellfix1.cpython-311-x86_64-linux-gnu.so"
]
self.label: str = "Cache"
def get_matched(self, matched_candidate: tuple, confidence: int,
sqlite_rows: Optional[list[sqlite3.Row]] = None,
redis_results: Optional[list] = None) -> Optional[LyricsResult]:
def get_matched(
self,
matched_candidate: tuple,
confidence: int,
sqlite_rows: Optional[list[sqlite3.Row]] = None,
redis_results: Optional[list] = None,
) -> Optional[LyricsResult]:
"""
Get Matched Result
Args:
matched_candidate (tuple): the correctly matched candidate returned by matcher.best_match
confidence (int): % confidence
confidence (int): % confidence
sqlite_rows (Optional[list[sqlite3.Row]]): List of returned rows from SQLite DB, or None if Redis
redis_results (Any): List of Redis returned data, or None if SQLite
Returns:
@ -47,11 +59,11 @@ class Cache:
(key, row) = res
if key == matched_id:
return LyricsResult(
artist=row['artist'],
song=row['song'],
lyrics=row['lyrics'],
artist=row["artist"],
song=row["song"],
lyrics=row["lyrics"],
src=f"{row['src']} (redis cache, id: {key})",
confidence=row['confidence']
confidence=row["confidence"],
)
else:
for row in sqlite_rows:
@ -62,9 +74,10 @@ class Cache:
song=song,
lyrics=lyrics,
src=f"{original_src} (cached, id: {_id})",
confidence=confidence)
confidence=confidence,
)
return None
async def check_existence(self, artistsong: str) -> Optional[bool]:
"""
Check whether lyrics are already stored for track
@ -73,10 +86,13 @@ class Cache:
Returns:
bool: Whether track was found in cache
"""
logging.debug("Checking whether %s is already stored",
artistsong.replace("\n", " - "))
check_query: str = 'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
logging.debug(
"Checking whether %s is already stored", artistsong.replace("\n", " - ")
)
check_query: str = (
'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1'
)
artistsong_split = artistsong.split("\n", maxsplit=1)
artist = artistsong_split[0].lower()
song = artistsong_split[1].lower()
@ -84,39 +100,45 @@ class Cache:
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
await db_conn.enable_load_extension(True)
for ext in self.sqlite_exts:
await db_conn.load_extension(ext)
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
await db_conn.load_extension(ext)
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
async with await db_conn.execute(check_query, params) as db_cursor:
result = await db_cursor.fetchone()
if result:
logging.debug("%s is already stored.",
artistsong.replace("\n", " - "))
logging.debug(
"%s is already stored.", artistsong.replace("\n", " - ")
)
return True
logging.debug("%s cleared to be stored.",
artistsong)
logging.debug("%s cleared to be stored.", artistsong)
return False
async def store(self, lyr_result: LyricsResult) -> None:
"""
Store lyrics (SQLite, then Redis)
Args:
lyr_result (LyricsResult): the returned lyrics to cache
Returns: None
Returns: None
"""
try:
sqlite_insert_id = await self.sqlite_store(lyr_result)
if sqlite_insert_id:
await self.redis_cache.redis_store(sqlite_insert_id, lyr_result)
except Exception as e:
traceback.print_exc()
logging.error("ERROR @ %s: %s",
__file__.rsplit("/", maxsplit=1)[-1], f"cache::store >> {str(e)}")
await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",
f"cache::store >> `{str(e)}`")
async def sqlite_rowcount(self, where: Optional[str] = None,
params: Optional[tuple] = None) -> int:
logging.error(
"ERROR @ %s: %s",
__file__.rsplit("/", maxsplit=1)[-1],
f"cache::store >> {str(e)}",
)
await self.notifier.send(
f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",
f"cache::store >> `{str(e)}`",
)
async def sqlite_rowcount(
self, where: Optional[str] = None, params: Optional[tuple] = None
) -> int:
"""
Get rowcount for cached_lyrics DB
Args:
@ -130,8 +152,8 @@ class Cache:
query = f"SELECT count(id) AS rowcount FROM lyrics {where}".strip()
async with await db_conn.execute(query, params) as db_cursor:
result = await db_cursor.fetchone()
return result['rowcount']
return result["rowcount"]
async def sqlite_distinct(self, column: str) -> int:
"""
Get count of distinct values for a column
@ -145,8 +167,8 @@ class Cache:
query = f"SELECT COUNT(DISTINCT {column}) as distinct_items FROM lyrics"
async with await db_conn.execute(query) as db_cursor:
result = await db_cursor.fetchone()
return result['distinct_items']
return result["distinct_items"]
async def sqlite_lyrics_length(self) -> int:
"""
Get total length of text stored for lyrics
@ -160,9 +182,8 @@ class Cache:
query = "SELECT SUM(LENGTH(lyrics)) as lyrics_len FROM lyrics"
async with await db_conn.execute(query) as db_cursor:
result = await db_cursor.fetchone()
return result['lyrics_len']
return result["lyrics_len"]
async def sqlite_store(self, lyr_result: LyricsResult) -> int:
"""
Store lyrics to SQLite Cache
@ -172,30 +193,42 @@ class Cache:
int: the inserted row id
"""
logging.info("Storing %s",
f"{lyr_result.artist} - {lyr_result.song}")
logging.info("Storing %s", f"{lyr_result.artist} - {lyr_result.song}")
if lyr_result.src.lower() == "cache":
logging.info("Skipping cache storage - returned LyricsResult originated from cache")
logging.info(
"Skipping cache storage - returned LyricsResult originated from cache"
)
return
artistsong = f"{lyr_result.artist}\n{lyr_result.song}"
if await self.check_existence(artistsong):
logging.info("Skipping cache storage - %s is already stored.",
artistsong.replace("\n", " - "))
logging.info(
"Skipping cache storage - %s is already stored.",
artistsong.replace("\n", " - "),
)
return
try:
lyrics = regex.sub(r'(<br>|\n|\r\n)', ' / ', lyr_result.lyrics.strip())
lyrics = regex.sub(r'\s{2,}', ' ', lyrics)
lyrics = regex.sub(r"(<br>|\n|\r\n)", " / ", lyr_result.lyrics.strip())
lyrics = regex.sub(r"\s{2,}", " ", lyrics)
insert_query = "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
VALUES(?, ?, ?, ?, ?, ?, ?)"
params = (lyr_result.src, time.time(), lyr_result.artist,
lyr_result.song, artistsong, lyr_result.confidence, lyrics)
params = (
lyr_result.src,
time.time(),
lyr_result.artist,
lyr_result.song,
artistsong,
lyr_result.confidence,
lyrics,
)
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
async with await db_conn.executescript(
self.cache_pre_query
) as _db_cursor:
async with await db_conn.execute(insert_query, params) as _cursor:
await db_conn.commit()
logging.info("Stored %s to SQLite!", artistsong.replace("\n", " - "))
@ -203,7 +236,7 @@ class Cache:
except:
logging.critical("Cache storage error!")
traceback.print_exc()
async def search(self, artist: str, song: str, **kwargs) -> Optional[LyricsResult]:
"""
Cache Search
@ -214,8 +247,8 @@ class Cache:
Optional[LyricsResult]: The result, if found - None otherwise.
"""
try:
artist: str = artist.strip().lower()
song: str = song.strip().lower()
artist: str = artist.strip().lower()
song: str = song.strip().lower()
input_track: str = f"{artist} - {song}"
search_query = None
search_params: Optional[tuple] = None
@ -225,87 +258,105 @@ class Cache:
if artist == "!" and song == "!":
random_search = True
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence\
FROM lyrics ORDER BY RANDOM() LIMIT 1'
search_query: str = (
"SELECT id, artist, song, lyrics, src, confidence\
FROM lyrics ORDER BY RANDOM() LIMIT 1"
)
logging.info("Searching %s - %s on %s", artist, song, self.label)
logging.info("Searching %s - %s on %s",
artist, song, self.label)
"""Check Redis First"""
logging.debug("Checking redis cache for %s...",
f"{artist} - {song}")
logging.debug("Checking redis cache for %s...", f"{artist} - {song}")
try:
redis_result = await self.redis_cache.search(artist=artist,
song=song)
redis_result = await self.redis_cache.search(artist=artist, song=song)
if redis_result:
result_tracks: list = []
for returned in redis_result:
(key, track) = returned
result_tracks.append((key, f"{track['artist']} - {track['song']}"))
result_tracks.append(
(key, f"{track['artist']} - {track['song']}")
)
if not random_search:
best_match: Optional[tuple] = matcher.find_best_match(input_track=input_track,
candidate_tracks=result_tracks)
best_match: Optional[tuple] = matcher.find_best_match(
input_track=input_track, candidate_tracks=result_tracks
)
else:
best_match = (result_tracks[0], 100)
if best_match:
(candidate, confidence) = best_match
matched = self.get_matched(redis_results=redis_result, matched_candidate=candidate,
confidence=confidence)
matched = self.get_matched(
redis_results=redis_result,
matched_candidate=candidate,
confidence=confidence,
)
if matched and confidence >= 90:
time_end: float = time.time()
time_diff: float = time_end - time_start
matched.confidence = confidence
matched.time = time_diff
logging.info("Found %s on redis cache, skipping SQLite...",
f"{artist} - {song}")
await self.redis_cache.increment_found_count(self.label)
logging.info(
"Found %s on redis cache, skipping SQLite...",
f"{artist} - {song}",
)
await self.redis_cache.increment_found_count(self.label)
return matched
except:
pass
"""SQLite: Fallback"""
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
await db_conn.enable_load_extension(True)
for ext in self.sqlite_exts:
await db_conn.load_extension(ext)
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
async with await db_conn.executescript(
self.cache_pre_query
) as _db_cursor:
if not random_search:
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
search_query: str = (
'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
search_params: tuple = (artist.strip(), song.strip(),
f"{artist.strip()} {song.strip()}")
async with await _db_cursor.execute(search_query, search_params) as db_cursor:
)
search_params: tuple = (
artist.strip(),
song.strip(),
f"{artist.strip()} {song.strip()}",
)
async with await _db_cursor.execute(
search_query, search_params
) as db_cursor:
results: list = await db_cursor.fetchall()
result_tracks: list = []
for track in results:
(_id, _artist, _song, _lyrics, _src, _confidence) = track
result_tracks.append((_id, f"{_artist} - {_song}"))
if not random_search:
best_match: Optional[tuple] = matcher.find_best_match(input_track=input_track,
candidate_tracks=result_tracks)
best_match: Optional[tuple] = matcher.find_best_match(
input_track=input_track, candidate_tracks=result_tracks
)
else:
best_match = (result_tracks[0], 100)
if not best_match or confidence < 90:
return None
(candidate, confidence) = best_match
logging.info("Result found on %s", self.label)
matched = self.get_matched(sqlite_rows=results,
matched_candidate=candidate,
confidence=confidence)
matched = self.get_matched(
sqlite_rows=results,
matched_candidate=candidate,
confidence=confidence,
)
time_end: float = time.time()
time_diff: float = time_end - time_start
matched.time = time_diff
await self.redis_cache.increment_found_count(self.label)
return matched
except:
traceback.print_exc()
traceback.print_exc()

View File

@ -1,4 +1,4 @@
SCRAPE_HEADERS: dict[str, str] = {
'accept': '*/*',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
}
"accept": "*/*",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0",
}

View File

@ -1,29 +1,31 @@
import sys
sys.path.insert(1,'..')
sys.path.insert(1, "..")
import traceback
import logging
import time
import re
from typing import Optional
from aiohttp import ClientTimeout, ClientSession
from bs4 import BeautifulSoup, ResultSet # type: ignore
from bs4 import BeautifulSoup, ResultSet # type: ignore
import html as htm
from . import private, common, cache, redis_cache
from lyric_search import utils
from lyric_search.constructors import (
LyricsResult, InvalidGeniusResponseException)
from lyric_search.constructors import LyricsResult, InvalidGeniusResponseException
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
class Genius:
"""
Genius Search Module
"""
def __init__(self) -> None:
self.label: str = "Genius"
self.genius_url: str = private.GENIUS_URL
self.genius_search_url: str = f'{self.genius_url}api/search/song?q='
self.genius_search_url: str = f"{self.genius_url}api/search/song?q="
self.headers: dict = common.SCRAPE_HEADERS
self.timeout = ClientTimeout(connect=3, sock_read=5)
self.datautils = utils.DataUtils()
@ -31,8 +33,7 @@ class Genius:
self.cache = cache.Cache()
self.redis_cache = redis_cache.RedisCache()
async def search(self, artist: str, song: str,
**kwargs) -> Optional[LyricsResult]:
async def search(self, artist: str, song: str, **kwargs) -> Optional[LyricsResult]:
"""
Genius Search
Args:
@ -45,96 +46,125 @@ class Genius:
artist: str = artist.strip().lower()
song: str = song.strip().lower()
time_start: float = time.time()
logging.info("Searching %s - %s on %s",
artist, song, self.label)
search_term: str = f'{artist}%20{song}'
returned_lyrics: str = ''
logging.info("Searching %s - %s on %s", artist, song, self.label)
search_term: str = f"{artist}%20{song}"
returned_lyrics: str = ""
async with ClientSession() as client:
async with client.get(f'{self.genius_search_url}{search_term}',
timeout=self.timeout,
headers=self.headers) as request:
async with client.get(
f"{self.genius_search_url}{search_term}",
timeout=self.timeout,
headers=self.headers,
) as request:
request.raise_for_status()
text: Optional[str] = await request.text()
if not text:
raise InvalidGeniusResponseException("No search response.")
if len(text) < 100:
raise InvalidGeniusResponseException("Search response text was invalid (len < 100 chars.)")
raise InvalidGeniusResponseException(
"Search response text was invalid (len < 100 chars.)"
)
search_data = await request.json()
if not isinstance(search_data, dict):
raise InvalidGeniusResponseException("Invalid JSON.")
if not isinstance(search_data['response'], dict):
raise InvalidGeniusResponseException(f"Invalid JSON: Cannot find response key.\n{search_data}")
if not isinstance(search_data['response']['sections'], list):
raise InvalidGeniusResponseException(f"Invalid JSON: Cannot find response->sections key.\n{search_data}")
if not isinstance(search_data['response']['sections'][0]['hits'], list):
raise InvalidGeniusResponseException("Invalid JSON: Cannot find response->sections[0]->hits key.")
possible_matches: list = search_data['response']['sections'][0]['hits']
if not isinstance(search_data["response"], dict):
raise InvalidGeniusResponseException(
f"Invalid JSON: Cannot find response key.\n{search_data}"
)
if not isinstance(search_data["response"]["sections"], list):
raise InvalidGeniusResponseException(
f"Invalid JSON: Cannot find response->sections key.\n{search_data}"
)
if not isinstance(
search_data["response"]["sections"][0]["hits"], list
):
raise InvalidGeniusResponseException(
"Invalid JSON: Cannot find response->sections[0]->hits key."
)
possible_matches: list = search_data["response"]["sections"][0][
"hits"
]
to_scrape: list[tuple] = [
(
returned['result']['path'],
f'{returned['result']['artist_names']} - {returned['result']['title']}',
) for returned in possible_matches
returned["result"]["path"],
f"{returned['result']['artist_names']} - {returned['result']['title']}",
)
for returned in possible_matches
]
searched: str = f"{artist} - {song}"
best_match: tuple = self.matcher.find_best_match(input_track=searched,
candidate_tracks=to_scrape)
best_match: tuple = self.matcher.find_best_match(
input_track=searched, candidate_tracks=to_scrape
)
((scrape_stub, track), confidence) = best_match
scrape_url: str = f'{self.genius_url}{scrape_stub[1:]}'
async with client.get(scrape_url,
timeout=self.timeout,
headers=self.headers) as scrape_request:
scrape_url: str = f"{self.genius_url}{scrape_stub[1:]}"
async with client.get(
scrape_url, timeout=self.timeout, headers=self.headers
) as scrape_request:
scrape_request.raise_for_status()
scrape_text: Optional[str] = await scrape_request.text()
if not scrape_text:
raise InvalidGeniusResponseException("No scrape response.")
if len(scrape_text) < 100:
raise InvalidGeniusResponseException("Scrape response was invalid (len < 100 chars.)")
html = BeautifulSoup(htm.unescape(scrape_text).replace('<br/>', '\n'), "html.parser")
header_tags_genius: Optional[ResultSet] = html.find_all(class_=re.compile(r'.*Header.*'))
raise InvalidGeniusResponseException(
"Scrape response was invalid (len < 100 chars.)"
)
html = BeautifulSoup(
htm.unescape(scrape_text).replace("<br/>", "\n"),
"html.parser",
)
header_tags_genius: Optional[ResultSet] = html.find_all(
class_=re.compile(r".*Header.*")
)
if header_tags_genius:
for tag in header_tags_genius:
tag.extract()
divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"})
divs: Optional[ResultSet] = html.find_all(
"div", {"data-lyrics-container": "true"}
)
if not divs:
return
for div in divs:
header_tags: Optional[ResultSet] = div.find_all(['h1', 'h2', 'h3', 'h4', 'h5'])
header_tags: Optional[ResultSet] = div.find_all(
["h1", "h2", "h3", "h4", "h5"]
)
if header_tags:
for tag in header_tags:
tag.extract()
tag.extract()
returned_lyrics += div.get_text()
returned_lyrics: str = self.datautils.scrub_lyrics(returned_lyrics)
returned_lyrics: str = self.datautils.scrub_lyrics(
returned_lyrics
)
artist: str = track.split(" - ", maxsplit=1)[0]
song: str = track.split(" - ", maxsplit=1)[1]
logging.info("Result found on %s", self.label)
time_end: float = time.time()
time_diff: float = time_end - time_start
matched = LyricsResult(artist=artist,
song=song,
src=self.label,
lyrics=returned_lyrics,
confidence=confidence,
time=time_diff)
matched = LyricsResult(
artist=artist,
song=song,
src=self.label,
lyrics=returned_lyrics,
confidence=confidence,
time=time_diff,
)
await self.redis_cache.increment_found_count(self.label)
await self.cache.store(matched)
return matched
except:
traceback.print_exc()
traceback.print_exc()

View File

@ -1,6 +1,7 @@
import sys
import time
sys.path.insert(1,'..')
sys.path.insert(1, "..")
import traceback
import logging
from typing import Optional, Union
@ -13,20 +14,23 @@ from lyric_search.constructors import InvalidLRCLibResponseException
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
class LRCLib:
"""LRCLib Search Module"""
def __init__(self) -> None:
self.label: str = "LRCLib"
self.lrclib_url: str = "https://lrclib.net/api/search"
self.headers: dict = common.SCRAPE_HEADERS
self.timeout = ClientTimeout(connect=2, sock_read=4)
self.datautils = utils.DataUtils()
self.matcher = utils.TrackMatcher()
self.matcher = utils.TrackMatcher()
self.cache = cache.Cache()
self.redis_cache = redis_cache.RedisCache()
async def search(self, artist: str, song: str,
plain: Optional[bool] = True) -> Optional[LyricsResult]:
async def search(
self, artist: str, song: str, plain: Optional[bool] = True
) -> Optional[LyricsResult]:
"""
LRCLib Search
Args:
@ -35,92 +39,124 @@ class LRCLib:
Returns:
Optional[LyricsResult]: The result, if found - None otherwise.
"""
try:
try:
artist: str = artist.strip().lower()
song: str = song.strip().lower()
time_start: float = time.time()
lrc_obj: Optional[list[dict]] = None
logging.info("Searching %s - %s on %s",
artist, song, self.label)
input_track: str = f"{artist} - {song}"
returned_lyrics: str = ''
logging.info("Searching %s - %s on %s", artist, song, self.label)
input_track: str = f"{artist} - {song}"
returned_lyrics: str = ""
async with ClientSession() as client:
async with await client.get(self.lrclib_url,
params = {
'artist_name': artist,
'track_name': song,
},
timeout=self.timeout,
headers=self.headers) as request:
async with await client.get(
self.lrclib_url,
params={
"artist_name": artist,
"track_name": song,
},
timeout=self.timeout,
headers=self.headers,
) as request:
request.raise_for_status()
text: Optional[str] = await request.text()
if not text:
raise InvalidLRCLibResponseException("No search response.")
raise InvalidLRCLibResponseException("No search response.")
if len(text) < 100:
raise InvalidLRCLibResponseException("Search response text was invalid (len < 100 chars.)")
raise InvalidLRCLibResponseException(
"Search response text was invalid (len < 100 chars.)"
)
search_data: Optional[Union[list, dict]] = await request.json()
if not isinstance(search_data, list|dict):
if not isinstance(search_data, list | dict):
raise InvalidLRCLibResponseException("No JSON search data.")
# logging.info("Search Data:\n%s", search_data)
if not isinstance(search_data, list):
raise InvalidLRCLibResponseException("Invalid JSON.")
if plain:
possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
for x, result in enumerate(search_data)]
possible_matches = [
(
x,
f"{result.get('artistName')} - {result.get('trackName')}",
)
for x, result in enumerate(search_data)
]
else:
logging.info("Limiting possible matches to only those with non-null syncedLyrics")
possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
for x, result in enumerate(search_data) if isinstance(result['syncedLyrics'], str)]
logging.info(
"Limiting possible matches to only those with non-null syncedLyrics"
)
possible_matches = [
(
x,
f"{result.get('artistName')} - {result.get('trackName')}",
)
for x, result in enumerate(search_data)
if isinstance(result["syncedLyrics"], str)
]
best_match = self.matcher.find_best_match(input_track,
possible_matches)[0]
best_match = self.matcher.find_best_match(
input_track, possible_matches
)[0]
if not best_match:
return
best_match_id = best_match[0]
if not isinstance(search_data[best_match_id]['artistName'], str):
raise InvalidLRCLibResponseException(f"Invalid JSON: Cannot find artistName key.\n{search_data}")
if not isinstance(search_data[best_match_id]['trackName'], str):
raise InvalidLRCLibResponseException(f"Invalid JSON: Cannot find trackName key.\n{search_data}")
returned_artist: str = search_data[best_match_id]['artistName']
returned_song: str = search_data[best_match_id]['trackName']
if not isinstance(search_data[best_match_id]["artistName"], str):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find artistName key.\n{search_data}"
)
if not isinstance(search_data[best_match_id]["trackName"], str):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find trackName key.\n{search_data}"
)
returned_artist: str = search_data[best_match_id]["artistName"]
returned_song: str = search_data[best_match_id]["trackName"]
if plain:
if not isinstance(search_data[best_match_id]['plainLyrics'], str):
raise InvalidLRCLibResponseException(f"Invalid JSON: Cannot find plainLyrics key.\n{search_data}")
returned_lyrics: str = search_data[best_match_id]['plainLyrics']
if not isinstance(
search_data[best_match_id]["plainLyrics"], str
):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find plainLyrics key.\n{search_data}"
)
returned_lyrics: str = search_data[best_match_id]["plainLyrics"]
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
else:
if not isinstance(search_data[best_match_id]['syncedLyrics'], str):
raise InvalidLRCLibResponseException(f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}")
returned_lyrics: str = search_data[best_match_id]['syncedLyrics']
if not isinstance(
search_data[best_match_id]["syncedLyrics"], str
):
raise InvalidLRCLibResponseException(
f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}"
)
returned_lyrics: str = search_data[best_match_id][
"syncedLyrics"
]
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
returned_track: str = f"{returned_artist} - {returned_song}"
(_matched, confidence) = self.matcher.find_best_match(input_track=input_track,
candidate_tracks=[(0, returned_track)])
(_matched, confidence) = self.matcher.find_best_match(
input_track=input_track, candidate_tracks=[(0, returned_track)]
)
if not confidence:
return # No suitable match found
return # No suitable match found
logging.info("Result found on %s", self.label)
time_end: float = time.time()
time_diff: float = time_end - time_start
matched = LyricsResult(artist=returned_artist,
song=returned_song,
src=self.label,
lyrics=returned_lyrics if plain else lrc_obj,
confidence=confidence,
time=time_diff)
matched = LyricsResult(
artist=returned_artist,
song=returned_song,
src=self.label,
lyrics=returned_lyrics if plain else lrc_obj,
confidence=confidence,
time=time_diff,
)
await self.redis_cache.increment_found_count(self.label)
await self.cache.store(matched)
return matched
except:
traceback.print_exc()
traceback.print_exc()

View File

@ -7,24 +7,27 @@ import regex
from regex import Pattern
import asyncio
from typing import Union, Optional
sys.path.insert(1,'..')
sys.path.insert(1, "..")
from lyric_search import notifier
from lyric_search.constructors import LyricsResult
import redis.asyncio as redis
from redis.commands.search.query import Query # type: ignore
from redis.commands.search.indexDefinition import IndexDefinition, IndexType # type: ignore
from redis.commands.search.field import TextField, TagField # type: ignore
from redis.commands.json.path import Path # type: ignore
from redis.commands.search.query import Query # type: ignore
from redis.commands.search.indexDefinition import IndexDefinition, IndexType # type: ignore
from redis.commands.search.field import TextField, TagField # type: ignore
from redis.commands.json.path import Path # type: ignore
from . import private
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
class RedisException(Exception):
"""
Redis Exception
"""
class RedisCache:
"""
Redis Cache Methods
@ -35,34 +38,37 @@ class RedisCache:
self.notifier = notifier.DiscordNotifier()
self.notify_warnings = False
self.regexes: list[Pattern] = [
regex.compile(r'\-'),
regex.compile(r'[^a-zA-Z0-9\s]'),
regex.compile(r"\-"),
regex.compile(r"[^a-zA-Z0-9\s]"),
]
try:
asyncio.get_event_loop().create_task(self.create_index())
except Exception as e:
logging.debug("Failed to create redis create_index task: %s",
str(e))
logging.debug("Failed to create redis create_index task: %s", str(e))
async def create_index(self) -> None:
"""Create Index"""
try:
schema = (
TextField("$.search_artist", as_name="artist"),
TextField("$.search_song", as_name="song"),
TextField("$.src", as_name="src"),
TextField("$.lyrics", as_name="lyrics")
)
TextField("$.search_artist", as_name="artist"),
TextField("$.search_song", as_name="song"),
TextField("$.src", as_name="src"),
TextField("$.lyrics", as_name="lyrics"),
)
result = await self.redis_client.ft().create_index(
schema, definition=IndexDefinition(prefix=["lyrics:"], index_type=IndexType.JSON))
schema,
definition=IndexDefinition(
prefix=["lyrics:"], index_type=IndexType.JSON
),
)
if str(result) != "OK":
raise RedisException(f"Redis: Failed to create index: {result}")
except Exception as e:
logging.debug("Failed to create redis index: %s",
str(e))
def sanitize_input(self, artist: str, song: str,
fuzzy: Optional[bool] = False) -> tuple[str, str]:
logging.debug("Failed to create redis index: %s", str(e))
def sanitize_input(
self, artist: str, song: str, fuzzy: Optional[bool] = False
) -> tuple[str, str]:
"""
Sanitize artist/song input (convert to redis matchable fuzzy query)
Args:
@ -77,10 +83,12 @@ class RedisCache:
song = self.regexes[0].sub("", song)
song = self.regexes[1].sub("", song).strip()
if fuzzy:
artist = " ".join([f"(%{artist_word}%)" for artist_word in artist.split(" ")])
artist = " ".join(
[f"(%{artist_word}%)" for artist_word in artist.split(" ")]
)
song = " ".join([f"(%{song_word}%)" for song_word in song.split(" ")])
return (artist, song)
async def increment_found_count(self, src: str) -> None:
"""
Increment the found count for a source
@ -94,13 +102,13 @@ class RedisCache:
await self.redis_client.incr(f"returned:{src}")
except Exception as e:
file: str = __file__.rsplit("/", maxsplit=1)[-1]
await self.notifier.send(f"ERROR @ {file}", str(e))
await self.notifier.send(f"ERROR @ {file}", str(e))
traceback.print_exc()
async def get_found_counts(self) -> Optional[dict]:
"""
Get found counts for all sources (and failed count)
Returns:
dict: In the form {'source': count, 'source2': count, ...}
"""
@ -109,18 +117,20 @@ class RedisCache:
counts: dict[str, int] = {}
for src in sources:
src_found_count = await self.redis_client.get(f"returned:{src}")
counts[src] = int(src_found_count) # Redis returns bytes
counts[src] = int(src_found_count) # Redis returns bytes
return counts
except Exception as e:
file: str = __file__.rsplit("/", maxsplit=1)[-1]
await self.notifier.send(f"ERROR @ {file}", str(e))
await self.notifier.send(f"ERROR @ {file}", str(e))
traceback.print_exc()
return None
async def search(self, artist: Optional[str] = None,
song: Optional[str] = None,
lyrics: Optional[str] = None) -> Optional[list[tuple]]:
async def search(
self,
artist: Optional[str] = None,
song: Optional[str] = None,
lyrics: Optional[str] = None,
) -> Optional[list[tuple]]:
"""
Search Redis Cache
Args:
@ -133,57 +143,72 @@ class RedisCache:
try:
fuzzy_artist = None
fuzzy_song = None
fuzzy_song = None
is_random_search = artist == "!" and song == "!"
if lyrics:
# to code later
raise RedisException("Lyric search not yet implemented")
if not is_random_search:
logging.debug("Redis: Searching normally first")
if not artist or not song:
logging.info("redis_cache:: search failed: No artist or song provided.")
logging.info(
"redis_cache:: search failed: No artist or song provided."
)
return None
(artist, song) = self.sanitize_input(artist, song)
logging.debug("Seeking: %s - %s", artist, song)
search_res: Union[dict, list] = await self.redis_client.ft().search(Query( # type: ignore
f"@artist:{artist} @song:{song}"
))
search_res_out: list[tuple] = [(result['id'].split(":",
maxsplit=1)[1], dict(json.loads(result['json'])))
for result in search_res.docs] # type: ignore
search_res: Union[dict, list] = await self.redis_client.ft().search(
Query(f"@artist:{artist} @song:{song}") # type: ignore
)
search_res_out: list[tuple] = [
(
result["id"].split(":", maxsplit=1)[1],
dict(json.loads(result["json"])),
)
for result in search_res.docs
] # type: ignore
if not search_res_out:
logging.debug("Redis: Normal search failed, trying with fuzzy search")
logging.debug(
"Redis: Normal search failed, trying with fuzzy search"
)
short_artist = " ".join(artist.split(" ")[0:5])
short_song = " ".join(song.split(" ")[0:5])
(fuzzy_artist, fuzzy_song) = self.sanitize_input(artist=short_artist.strip(),
song=short_song.strip(), fuzzy=True)
search_res = await self.redis_client.ft().search(Query( # type: ignore
f"@artist:{fuzzy_artist} @song:{fuzzy_song}"
))
search_res_out = [(result['id'].split(":",
maxsplit=1)[1], dict(json.loads(result['json'])))
for result in search_res.docs] # type: ignore
short_song = " ".join(song.split(" ")[0:5])
(fuzzy_artist, fuzzy_song) = self.sanitize_input(
artist=short_artist.strip(), song=short_song.strip(), fuzzy=True
)
search_res = await self.redis_client.ft().search(
Query( # type: ignore
f"@artist:{fuzzy_artist} @song:{fuzzy_song}"
)
)
search_res_out = [
(
result["id"].split(":", maxsplit=1)[1],
dict(json.loads(result["json"])),
)
for result in search_res.docs
] # type: ignore
else:
random_redis_key: str = await self.redis_client.randomkey()
out_id: str = str(random_redis_key).split(":",
maxsplit=1)[1][:-1]
out_id: str = str(random_redis_key).split(":", maxsplit=1)[1][:-1]
search_res = await self.redis_client.json().get(random_redis_key)
search_res_out = [(out_id, search_res)]
if not search_res_out and self.notify_warnings:
await self.notifier.send("WARNING", f"Redis cache miss for: `{artist} - {song}`")
await self.notifier.send(
"WARNING", f"Redis cache miss for: `{artist} - {song}`"
)
return search_res_out
except Exception as e:
traceback.print_exc()
# await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}", f"{str(e)}\nSearch was: {artist} - {song}; fuzzy: {fuzzy_artist} - {fuzzy_song}")
# await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}", f"{str(e)}\nSearch was: {artist} - {song}; fuzzy: {fuzzy_artist} - {fuzzy_song}")
return None
async def redis_store(self, sqlite_id: int,
lyr_result: LyricsResult) -> None:
async def redis_store(self, sqlite_id: int, lyr_result: LyricsResult) -> None:
"""
Store lyrics to redis cache
Args:
@ -193,34 +218,47 @@ class RedisCache:
None
"""
try:
(search_artist, search_song) = self.sanitize_input(lyr_result.artist,
lyr_result.song)
(search_artist, search_song) = self.sanitize_input(
lyr_result.artist, lyr_result.song
)
redis_mapping: dict = {
'id': sqlite_id,
'src': lyr_result.src,
'date_retrieved': time.time(),
'artist': lyr_result.artist,
'search_artist': search_artist,
'search_song': search_song,
'search_artistsong': f'{search_artist}\n{search_song}',
'song': lyr_result.song,
'artistsong': f"{lyr_result.artist}\n{lyr_result.song}",
'confidence': lyr_result.confidence,
'lyrics': lyr_result.lyrics,
'tags': '(none)',
'liked': 0,
}
"id": sqlite_id,
"src": lyr_result.src,
"date_retrieved": time.time(),
"artist": lyr_result.artist,
"search_artist": search_artist,
"search_song": search_song,
"search_artistsong": f"{search_artist}\n{search_song}",
"song": lyr_result.song,
"artistsong": f"{lyr_result.artist}\n{lyr_result.song}",
"confidence": lyr_result.confidence,
"lyrics": lyr_result.lyrics,
"tags": "(none)",
"liked": 0,
}
newkey: str = f"lyrics:000{sqlite_id}"
jsonset: bool = await self.redis_client.json().set(newkey, Path.root_path(),
redis_mapping)
jsonset: bool = await self.redis_client.json().set(
newkey, Path.root_path(), redis_mapping
)
if not jsonset:
raise RedisException(f"Failed to store {lyr_result.artist} - {lyr_result.song} (SQLite id: {sqlite_id}) to redis:\n{jsonset}")
logging.info("Stored %s - %s (related SQLite Row ID: %s) to %s",
lyr_result.artist, lyr_result.song, sqlite_id, newkey)
await self.notifier.send("INFO",
f"Stored `{lyr_result.artist} - {lyr_result.song}` (related SQLite Row ID: `{sqlite_id}`) to redis: `{newkey}`")
raise RedisException(
f"Failed to store {lyr_result.artist} - {lyr_result.song} (SQLite id: {sqlite_id}) to redis:\n{jsonset}"
)
logging.info(
"Stored %s - %s (related SQLite Row ID: %s) to %s",
lyr_result.artist,
lyr_result.song,
sqlite_id,
newkey,
)
await self.notifier.send(
"INFO",
f"Stored `{lyr_result.artist} - {lyr_result.song}` (related SQLite Row ID: `{sqlite_id}`) to redis: `{newkey}`",
)
except Exception as e:
file: str = __file__.rsplit("/", maxsplit=1)[-1]
await self.notifier.send(f"ERROR @ {file}",
f"Failed to store `{lyr_result.artist} - {lyr_result.song}`\
(SQLite id: `{sqlite_id}`) to Redis:\n`{str(e)}`")
await self.notifier.send(
f"ERROR @ {file}",
f"Failed to store `{lyr_result.artist} - {lyr_result.song}`\
(SQLite id: `{sqlite_id}`) to Redis:\n`{str(e)}`",
)