311 lines
14 KiB
Python
Raw Normal View History

2025-01-13 20:47:39 -05:00
import os
2025-01-15 20:17:49 -05:00
import time
import regex
2025-01-14 11:10:13 -05:00
import logging
2025-01-13 20:47:39 -05:00
import sys
2025-01-14 11:10:13 -05:00
import traceback
2025-01-13 20:47:39 -05:00
sys.path.insert(1,'..')
2025-01-14 07:45:34 -05:00
sys.path.insert(1,'.')
2025-02-15 21:09:33 -05:00
from typing import Optional, Union, LiteralString
2025-01-14 07:45:34 -05:00
import aiosqlite as sqlite3
2025-01-18 13:26:00 -05:00
from . import redis_cache
from lyric_search import utils, notifier
from lyric_search.constructors import LyricsResult
2025-01-13 20:47:39 -05:00
2025-01-14 11:10:13 -05:00
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
2025-01-12 20:19:48 -05:00
class Cache:
"""Cache Search Module"""
2025-01-19 07:09:05 -05:00
def __init__(self) -> None:
2025-02-15 21:09:33 -05:00
self.cache_db: Union[str, LiteralString] = os.path.join("/", "usr", "local", "share",
2025-01-24 19:26:07 -05:00
"sqlite_dbs", "cached_lyrics.db")
2025-01-18 13:26:00 -05:00
self.redis_cache = redis_cache.RedisCache()
self.notifier = notifier.DiscordNotifier()
2025-01-18 13:26:00 -05:00
2025-01-16 09:21:50 -05:00
self.cache_pre_query: str = "pragma journal_mode = WAL; pragma synchronous = normal;\
pragma temp_store = memory; pragma mmap_size = 30000000000;"
2025-03-14 13:45:49 -04:00
self.sqlite_exts: list[str] = ['/home/api/api/solibs/spellfix1.cpython-311-x86_64-linux-gnu.so']
2025-01-14 14:17:18 -05:00
self.label: str = "Cache"
2025-01-13 20:47:39 -05:00
2025-01-18 13:26:00 -05:00
def get_matched(self, matched_candidate: tuple, confidence: int,
2025-02-15 21:09:33 -05:00
sqlite_rows: Optional[list[sqlite3.Row]] = None,
redis_results: Optional[list] = None) -> Optional[LyricsResult]:
2025-01-19 07:01:07 -05:00
"""
Get Matched Result
Args:
matched_candidate (tuple): the correctly matched candidate returned by matcher.best_match
confidence (int): % confidence
2025-02-15 21:09:33 -05:00
sqlite_rows (Optional[list[sqlite3.Row]]): List of returned rows from SQLite DB, or None if Redis
2025-01-19 07:01:07 -05:00
redis_results (Any): List of Redis returned data, or None if SQLite
Returns:
2025-02-15 21:09:33 -05:00
Optional[LyricsResult]: The result, if found - None otherwise.
2025-01-19 07:01:07 -05:00
"""
2025-01-14 14:17:18 -05:00
matched_id: int = matched_candidate[0]
2025-01-18 13:26:00 -05:00
if redis_results:
2025-01-18 14:17:39 -05:00
for res in redis_results:
(key, row) = res
if key == matched_id:
2025-01-18 13:26:00 -05:00
return LyricsResult(
artist=row['artist'],
song=row['song'],
lyrics=row['lyrics'],
2025-01-18 14:46:05 -05:00
src=f"{row['src']} (redis cache, id: {key})",
2025-01-18 13:26:00 -05:00
confidence=row['confidence']
)
else:
for row in sqlite_rows:
if row[0] == matched_id:
2025-02-15 21:09:33 -05:00
(_id, artist, song, lyrics, original_src) = row[:-1]
2025-01-18 13:26:00 -05:00
return LyricsResult(
artist=artist,
song=song,
lyrics=lyrics,
src=f"{original_src} (cached, id: {_id})",
confidence=confidence)
2025-01-13 20:47:39 -05:00
return None
2025-01-15 20:17:49 -05:00
async def check_existence(self, artistsong: str) -> Optional[bool]:
"""
Check whether lyrics are already stored for track
2025-01-19 07:01:07 -05:00
Args:
artistsong (str): artist and song in artist\\nsong format
Returns:
bool: Whether track was found in cache
2025-01-15 20:17:49 -05:00
"""
logging.debug("Checking whether %s is already stored",
artistsong.replace("\n", " - "))
check_query: str = 'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1'
artistsong_split = artistsong.split("\n", maxsplit=1)
artist = artistsong_split[0].lower()
song = artistsong_split[1].lower()
params = (artist, song, artistsong.lower())
2025-01-15 20:17:49 -05:00
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
await db_conn.enable_load_extension(True)
for ext in self.sqlite_exts:
await db_conn.load_extension(ext)
2025-01-16 09:21:50 -05:00
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
async with await db_conn.execute(check_query, params) as db_cursor:
result = await db_cursor.fetchone()
if result:
logging.debug("%s is already stored.",
artistsong.replace("\n", " - "))
return True
2025-01-15 20:17:49 -05:00
logging.debug("%s cleared to be stored.",
artistsong)
return False
async def store(self, lyr_result: LyricsResult) -> None:
"""
Store lyrics (SQLite, then Redis)
2025-01-19 07:01:07 -05:00
Args:
lyr_result (LyricsResult): the returned lyrics to cache
Returns: None
"""
try:
sqlite_insert_id = await self.sqlite_store(lyr_result)
if sqlite_insert_id:
await self.redis_cache.redis_store(sqlite_insert_id, lyr_result)
except Exception as e:
traceback.print_exc()
logging.error("ERROR @ %s: %s",
__file__.rsplit("/", maxsplit=1)[-1], f"cache::store >> {str(e)}")
await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",
f"cache::store >> {str(e)}")
2025-02-15 21:09:33 -05:00
async def sqlite_rowcount(self, where: Optional[str] = None,
params: Optional[tuple] = None) -> int:
"""
Get rowcount for cached_lyrics DB
Args:
where (Optional[str]): WHERE ext for query if needed
params (Optional[tuple]): Parameters to query, if where is specified
Returns:
int: Number of rows found
"""
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
db_conn.row_factory = sqlite3.Row
query = f"SELECT count(id) AS rowcount FROM lyrics {where}".strip()
async with await db_conn.execute(query, params) as db_cursor:
result = await db_cursor.fetchone()
return result['rowcount']
async def sqlite_distinct(self, column: str) -> int:
"""
Get count of distinct values for a column
Args:
column (str): The column to check
Returns:
int: Number of distinct values found
"""
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
db_conn.row_factory = sqlite3.Row
query = f"SELECT COUNT(DISTINCT {column}) as distinct_items FROM lyrics"
async with await db_conn.execute(query) as db_cursor:
result = await db_cursor.fetchone()
return result['distinct_items']
async def sqlite_lyrics_length(self) -> int:
"""
Get total length of text stored for lyrics
Args:
None
Returns:
int: Total length of stored lyrics
"""
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
db_conn.row_factory = sqlite3.Row
query = "SELECT SUM(LENGTH(lyrics)) as lyrics_len FROM lyrics"
async with await db_conn.execute(query) as db_cursor:
result = await db_cursor.fetchone()
return result['lyrics_len']
async def sqlite_store(self, lyr_result: LyricsResult) -> int:
"""
Store lyrics to SQLite Cache
Args:
lyr_result (LyricsResult): the returned lyrics to cache
2025-01-19 07:01:07 -05:00
Returns:
int: the inserted row id
2025-01-15 20:17:49 -05:00
"""
logging.info("Storing %s",
f"{lyr_result.artist} - {lyr_result.song}")
if lyr_result.src.lower() == "cache":
logging.info("Skipping cache storage - returned LyricsResult originated from cache")
return
artistsong = f"{lyr_result.artist}\n{lyr_result.song}"
if await self.check_existence(artistsong):
logging.info("Skipping cache storage - %s is already stored.",
artistsong.replace("\n", " - "))
return
try:
lyrics = regex.sub(r'(<br>|\n|\r\n)', ' / ', lyr_result.lyrics.strip())
lyrics = regex.sub(r'\s{2,}', ' ', lyrics)
2025-01-15 20:21:19 -05:00
insert_query = "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
VALUES(?, ?, ?, ?, ?, ?, ?)"
2025-01-15 20:17:49 -05:00
params = (lyr_result.src, time.time(), lyr_result.artist,
lyr_result.song, artistsong, lyr_result.confidence, lyrics)
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
2025-01-16 09:21:50 -05:00
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
async with await db_conn.execute(insert_query, params) as _cursor:
await db_conn.commit()
logging.info("Stored %s to SQLite!", artistsong.replace("\n", " - "))
return _cursor.lastrowid
2025-01-15 20:17:49 -05:00
except:
logging.critical("Cache storage error!")
traceback.print_exc()
2025-01-16 07:14:36 -05:00
async def search(self, artist: str, song: str, **kwargs) -> Optional[LyricsResult]:
2025-01-13 20:47:39 -05:00
"""
2025-01-19 07:01:07 -05:00
Cache Search
Args:
artist: the artist to search
song: the song to search
2025-01-13 20:47:39 -05:00
Returns:
2025-02-15 21:09:33 -05:00
Optional[LyricsResult]: The result, if found - None otherwise.
2025-01-13 20:47:39 -05:00
"""
2025-01-14 11:10:13 -05:00
try:
2025-01-14 14:17:18 -05:00
artist: str = artist.strip().lower()
song: str = song.strip().lower()
2025-01-18 13:26:00 -05:00
input_track: str = f"{artist} - {song}"
2025-01-18 14:17:39 -05:00
search_query = None
2025-01-14 18:37:49 -05:00
search_params: Optional[tuple] = None
random_search: bool = False
2025-01-15 20:17:49 -05:00
time_start: float = time.time()
2025-01-18 13:26:00 -05:00
matcher = utils.TrackMatcher()
2025-01-15 20:17:49 -05:00
2025-01-18 14:17:39 -05:00
if artist == "!" and song == "!":
random_search = True
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence\
FROM lyrics ORDER BY RANDOM() LIMIT 1'
2025-01-14 11:13:39 -05:00
logging.info("Searching %s - %s on %s",
2025-01-18 13:26:00 -05:00
artist, song, self.label)
"""Check Redis First"""
logging.debug("Checking redis cache for %s...",
2025-01-18 13:26:00 -05:00
f"{artist} - {song}")
2025-02-05 20:23:06 -05:00
try:
redis_result = await self.redis_cache.search(artist=artist,
2025-01-18 13:26:00 -05:00
song=song)
2025-02-05 20:23:06 -05:00
if redis_result:
result_tracks: list = []
for returned in redis_result:
(key, track) = returned
result_tracks.append((key, f"{track['artist']} - {track['song']}"))
if not random_search:
2025-02-15 21:09:33 -05:00
best_match: Optional[tuple] = matcher.find_best_match(input_track=input_track,
2025-02-05 20:23:06 -05:00
candidate_tracks=result_tracks)
else:
best_match = (result_tracks[0], 100)
if best_match:
(candidate, confidence) = best_match
matched = self.get_matched(redis_results=redis_result, matched_candidate=candidate,
confidence=confidence)
2025-02-26 20:47:29 -05:00
if matched and confidence >= 90:
2025-02-05 20:23:06 -05:00
time_end: float = time.time()
time_diff: float = time_end - time_start
matched.confidence = confidence
matched.time = time_diff
2025-01-18 14:46:05 -05:00
2025-02-05 20:23:06 -05:00
logging.info("Found %s on redis cache, skipping SQLite...",
f"{artist} - {song}")
await self.redis_cache.increment_found_count(self.label)
return matched
except:
pass
2025-01-18 13:26:00 -05:00
"""SQLite: Fallback"""
2025-01-14 11:10:13 -05:00
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
await db_conn.enable_load_extension(True)
for ext in self.sqlite_exts:
await db_conn.load_extension(ext)
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
2025-01-18 14:17:39 -05:00
if not random_search:
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
2025-01-15 20:17:49 -05:00
WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
2025-01-18 14:17:39 -05:00
search_params: tuple = (artist.strip(), song.strip(),
f"{artist.strip()} {song.strip()}")
2025-01-14 11:10:13 -05:00
async with await _db_cursor.execute(search_query, search_params) as db_cursor:
2025-01-14 14:17:18 -05:00
results: list = await db_cursor.fetchall()
result_tracks: list = []
2025-01-14 11:10:13 -05:00
for track in results:
(_id, _artist, _song, _lyrics, _src, _confidence) = track
result_tracks.append((_id, f"{_artist} - {_song}"))
2025-01-14 18:37:49 -05:00
if not random_search:
2025-02-15 21:09:33 -05:00
best_match: Optional[tuple] = matcher.find_best_match(input_track=input_track,
2025-01-14 11:10:13 -05:00
candidate_tracks=result_tracks)
2025-01-14 18:37:49 -05:00
else:
2025-01-17 06:41:56 -05:00
best_match = (result_tracks[0], 100)
2025-02-26 20:47:29 -05:00
if not best_match or confidence < 90:
2025-01-14 11:10:13 -05:00
return None
(candidate, confidence) = best_match
logging.info("Result found on %s", self.label)
2025-01-15 20:17:49 -05:00
matched = self.get_matched(sqlite_rows=results,
2025-01-14 11:10:13 -05:00
matched_candidate=candidate,
confidence=confidence)
2025-01-15 20:17:49 -05:00
time_end: float = time.time()
time_diff: float = time_end - time_start
matched.time = time_diff
2025-01-22 06:38:40 -05:00
await self.redis_cache.increment_found_count(self.label)
2025-01-15 20:17:49 -05:00
return matched
2025-01-14 11:10:13 -05:00
except:
2025-02-15 21:09:33 -05:00
traceback.print_exc()