2025-01-12 20:19:48 -05:00
|
|
|
#!/usr/bin/env python3.12
|
2025-01-14 11:10:13 -05:00
|
|
|
# pylint: disable=wrong-import-order, wrong-import-position bare-except, broad-exception-caught
|
2025-01-12 20:19:48 -05:00
|
|
|
|
2025-01-13 20:47:39 -05:00
|
|
|
import os
|
2025-01-15 20:17:49 -05:00
|
|
|
import time
|
|
|
|
import regex
|
2025-01-14 11:10:13 -05:00
|
|
|
import logging
|
2025-01-13 20:47:39 -05:00
|
|
|
import sys
|
2025-01-14 11:10:13 -05:00
|
|
|
import traceback
|
2025-01-13 20:47:39 -05:00
|
|
|
sys.path.insert(1,'..')
|
2025-01-14 07:45:34 -05:00
|
|
|
sys.path.insert(1,'.')
|
2025-01-18 13:26:00 -05:00
|
|
|
from typing import Optional, Any
|
2025-01-14 07:45:34 -05:00
|
|
|
import aiosqlite as sqlite3
|
2025-01-18 13:26:00 -05:00
|
|
|
from . import redis_cache
|
2025-01-13 20:47:39 -05:00
|
|
|
from lyric_search_new import utils
|
|
|
|
from lyric_search_new.constructors import LyricsResult
|
|
|
|
|
2025-01-18 13:26:00 -05:00
|
|
|
|
|
|
|
|
2025-01-14 11:10:13 -05:00
|
|
|
logger = logging.getLogger()
|
|
|
|
log_level = logging.getLevelName(logger.level)
|
|
|
|
|
2025-01-12 20:19:48 -05:00
|
|
|
class Cache:
|
|
|
|
"""Cache Search Module"""
|
2025-01-19 07:09:05 -05:00
|
|
|
def __init__(self) -> None:
|
2025-01-14 14:17:18 -05:00
|
|
|
self.cache_db: str = os.path.join("/", "var",
|
2025-01-13 20:47:39 -05:00
|
|
|
"lib", "singerdbs",
|
|
|
|
"cached_lyrics.db")
|
2025-01-18 13:26:00 -05:00
|
|
|
self.redis_cache = redis_cache.RedisCache()
|
|
|
|
|
2025-01-16 09:21:50 -05:00
|
|
|
self.cache_pre_query: str = "pragma journal_mode = WAL; pragma synchronous = normal;\
|
|
|
|
pragma temp_store = memory; pragma mmap_size = 30000000000;"
|
2025-01-14 14:17:18 -05:00
|
|
|
self.sqlite_exts: list[str] = ['/usr/local/lib/python3.11/dist-packages/spellfix1.cpython-311-x86_64-linux-gnu.so']
|
|
|
|
self.label: str = "Cache"
|
2025-01-13 20:47:39 -05:00
|
|
|
|
2025-01-18 13:26:00 -05:00
|
|
|
def get_matched(self, matched_candidate: tuple, confidence: int,
|
|
|
|
sqlite_rows: list[sqlite3.Row] = None, redis_results: Any = None) -> Optional[LyricsResult]:
|
2025-01-19 07:01:07 -05:00
|
|
|
"""
|
|
|
|
Get Matched Result
|
|
|
|
Args:
|
|
|
|
matched_candidate (tuple): the correctly matched candidate returned by matcher.best_match
|
|
|
|
confidence (int): % confidence
|
|
|
|
sqlite_rows (list[sqlite3.Row]|None): List of returned rows from SQLite DB, or None if Redis
|
|
|
|
redis_results (Any): List of Redis returned data, or None if SQLite
|
|
|
|
Returns:
|
|
|
|
LyricsResult|None: The result, if found - None otherwise.
|
|
|
|
"""
|
2025-01-14 14:17:18 -05:00
|
|
|
matched_id: int = matched_candidate[0]
|
2025-01-18 13:26:00 -05:00
|
|
|
if redis_results:
|
2025-01-18 14:17:39 -05:00
|
|
|
for res in redis_results:
|
|
|
|
(key, row) = res
|
|
|
|
if key == matched_id:
|
|
|
|
logging.info("Matched row: %s", row)
|
2025-01-18 13:26:00 -05:00
|
|
|
return LyricsResult(
|
|
|
|
artist=row['artist'],
|
|
|
|
song=row['song'],
|
|
|
|
lyrics=row['lyrics'],
|
2025-01-18 14:46:05 -05:00
|
|
|
src=f"{row['src']} (redis cache, id: {key})",
|
2025-01-18 13:26:00 -05:00
|
|
|
confidence=row['confidence']
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
for row in sqlite_rows:
|
|
|
|
if row[0] == matched_id:
|
|
|
|
(_id, artist, song, lyrics, original_src, _confidence) = row
|
|
|
|
return LyricsResult(
|
|
|
|
artist=artist,
|
|
|
|
song=song,
|
|
|
|
lyrics=lyrics,
|
|
|
|
src=f"{original_src} (cached, id: {_id})",
|
|
|
|
confidence=confidence)
|
2025-01-13 20:47:39 -05:00
|
|
|
return None
|
2025-01-15 20:17:49 -05:00
|
|
|
|
|
|
|
async def check_existence(self, artistsong: str) -> Optional[bool]:
|
|
|
|
"""
|
|
|
|
Check whether lyrics are already stored for track
|
2025-01-19 07:01:07 -05:00
|
|
|
Args:
|
|
|
|
artistsong (str): artist and song in artist\\nsong format
|
|
|
|
Returns:
|
|
|
|
bool: Whether track was found in cache
|
2025-01-15 20:17:49 -05:00
|
|
|
"""
|
|
|
|
logging.debug("Checking whether %s is already stored",
|
|
|
|
artistsong.replace("\n", " - "))
|
|
|
|
check_query = "SELECT id FROM lyrics WHERE artistsong LIKE ? LIMIT 1"
|
2025-01-15 20:19:45 -05:00
|
|
|
params = (f"%{artistsong}%",)
|
2025-01-15 20:17:49 -05:00
|
|
|
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
|
2025-01-16 09:21:50 -05:00
|
|
|
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
|
|
|
|
async with await db_conn.execute(check_query, params) as db_cursor:
|
|
|
|
result = await db_cursor.fetchone()
|
|
|
|
if result:
|
|
|
|
logging.debug("%s is already stored.",
|
|
|
|
artistsong.replace("\n", " - "))
|
|
|
|
return True
|
2025-01-15 20:17:49 -05:00
|
|
|
logging.debug("%s cleared to be stored.",
|
|
|
|
artistsong)
|
|
|
|
return False
|
|
|
|
|
|
|
|
async def store(self, lyr_result: LyricsResult) -> None:
|
|
|
|
"""
|
2025-01-19 07:39:04 -05:00
|
|
|
Store lyrics (SQLite, then Redis)
|
2025-01-19 07:01:07 -05:00
|
|
|
Args:
|
2025-01-19 07:39:04 -05:00
|
|
|
lyr_result (LyricsResult): the returned lyrics to cache
|
|
|
|
Returns: None
|
|
|
|
"""
|
|
|
|
|
|
|
|
sqlite_insert_id = await self.sqlite_store(lyr_result)
|
|
|
|
if sqlite_insert_id:
|
|
|
|
await self.redis_cache.redis_store(sqlite_insert_id, lyr_result)
|
|
|
|
|
|
|
|
|
|
|
|
async def sqlite_store(self, lyr_result: LyricsResult) -> int:
|
|
|
|
"""
|
|
|
|
Store lyrics to SQLite Cache
|
|
|
|
Args:
|
|
|
|
lyr_result (LyricsResult): the returned lyrics to cache
|
2025-01-19 07:01:07 -05:00
|
|
|
Returns:
|
2025-01-19 07:39:04 -05:00
|
|
|
int: the inserted row id
|
2025-01-15 20:17:49 -05:00
|
|
|
"""
|
|
|
|
|
|
|
|
logging.info("Storing %s",
|
|
|
|
f"{lyr_result.artist} - {lyr_result.song}")
|
|
|
|
|
|
|
|
if lyr_result.src.lower() == "cache":
|
|
|
|
logging.info("Skipping cache storage - returned LyricsResult originated from cache")
|
|
|
|
return
|
|
|
|
|
|
|
|
artistsong = f"{lyr_result.artist}\n{lyr_result.song}"
|
|
|
|
if await self.check_existence(artistsong):
|
|
|
|
logging.info("Skipping cache storage - %s is already stored.",
|
|
|
|
artistsong.replace("\n", " - "))
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
lyrics = regex.sub(r'(<br>|\n|\r\n)', ' / ', lyr_result.lyrics.strip())
|
|
|
|
lyrics = regex.sub(r'\s{2,}', ' ', lyrics)
|
|
|
|
|
2025-01-15 20:21:19 -05:00
|
|
|
insert_query = "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
|
|
|
|
VALUES(?, ?, ?, ?, ?, ?, ?)"
|
2025-01-15 20:17:49 -05:00
|
|
|
params = (lyr_result.src, time.time(), lyr_result.artist,
|
|
|
|
lyr_result.song, artistsong, lyr_result.confidence, lyrics)
|
|
|
|
|
|
|
|
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
|
2025-01-16 09:21:50 -05:00
|
|
|
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
|
|
|
|
async with await db_conn.execute(insert_query, params) as _cursor:
|
|
|
|
await db_conn.commit()
|
2025-01-19 07:39:04 -05:00
|
|
|
logging.info("Stored %s to SQLite!", artistsong.replace("\n", " - "))
|
|
|
|
return _cursor.lastrowid
|
2025-01-15 20:17:49 -05:00
|
|
|
except:
|
|
|
|
logging.critical("Cache storage error!")
|
|
|
|
traceback.print_exc()
|
2025-01-19 07:39:04 -05:00
|
|
|
|
2025-01-15 20:17:49 -05:00
|
|
|
|
|
|
|
|
2025-01-16 09:37:50 -05:00
|
|
|
# pylint: disable=unused-argument
|
2025-01-16 07:14:36 -05:00
|
|
|
async def search(self, artist: str, song: str, **kwargs) -> Optional[LyricsResult]:
|
2025-01-13 20:47:39 -05:00
|
|
|
"""
|
2025-01-19 07:01:07 -05:00
|
|
|
Cache Search
|
|
|
|
Args:
|
|
|
|
artist: the artist to search
|
|
|
|
song: the song to search
|
2025-01-13 20:47:39 -05:00
|
|
|
Returns:
|
2025-01-19 07:01:07 -05:00
|
|
|
LyricsResult|None: The result, if found - None otherwise.
|
2025-01-13 20:47:39 -05:00
|
|
|
"""
|
2025-01-14 11:10:13 -05:00
|
|
|
try:
|
2025-01-16 09:37:50 -05:00
|
|
|
# pylint: enable=unused-argument
|
2025-01-14 14:17:18 -05:00
|
|
|
artist: str = artist.strip().lower()
|
|
|
|
song: str = song.strip().lower()
|
2025-01-18 13:26:00 -05:00
|
|
|
input_track: str = f"{artist} - {song}"
|
2025-01-18 14:17:39 -05:00
|
|
|
search_query = None
|
2025-01-14 18:37:49 -05:00
|
|
|
search_params: Optional[tuple] = None
|
|
|
|
random_search: bool = False
|
2025-01-15 20:17:49 -05:00
|
|
|
time_start: float = time.time()
|
2025-01-18 13:26:00 -05:00
|
|
|
matcher = utils.TrackMatcher()
|
2025-01-15 20:17:49 -05:00
|
|
|
|
2025-01-18 14:17:39 -05:00
|
|
|
if artist == "!" and song == "!":
|
|
|
|
random_search = True
|
|
|
|
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence\
|
|
|
|
FROM lyrics ORDER BY RANDOM() LIMIT 1'
|
|
|
|
|
2025-01-14 11:13:39 -05:00
|
|
|
logging.info("Searching %s - %s on %s",
|
2025-01-18 13:26:00 -05:00
|
|
|
artist, song, self.label)
|
|
|
|
|
|
|
|
"""Check Redis First"""
|
|
|
|
|
|
|
|
logging.info("Checking redis cache for %s...",
|
|
|
|
f"{artist} - {song}")
|
|
|
|
redis_result = await self.redis_cache.search(artist=artist,
|
|
|
|
song=song)
|
|
|
|
|
|
|
|
if redis_result:
|
|
|
|
result_tracks: list = []
|
2025-01-18 14:17:39 -05:00
|
|
|
for returned in redis_result:
|
|
|
|
(key, track) = returned
|
|
|
|
result_tracks.append((key, f"{track['artist']} - {track['song']}"))
|
2025-01-18 13:26:00 -05:00
|
|
|
|
2025-01-18 14:17:39 -05:00
|
|
|
if not random_search:
|
|
|
|
best_match: tuple|None = matcher.find_best_match(input_track=input_track,
|
2025-01-18 13:26:00 -05:00
|
|
|
candidate_tracks=result_tracks)
|
2025-01-18 14:17:39 -05:00
|
|
|
else:
|
|
|
|
best_match = (result_tracks[0], 100)
|
|
|
|
|
|
|
|
|
2025-01-18 14:46:05 -05:00
|
|
|
if best_match:
|
|
|
|
(candidate, confidence) = best_match
|
|
|
|
matched = self.get_matched(redis_results=redis_result, matched_candidate=candidate,
|
2025-01-18 13:26:00 -05:00
|
|
|
confidence=confidence)
|
2025-01-18 14:46:05 -05:00
|
|
|
|
|
|
|
if matched:
|
|
|
|
time_end: float = time.time()
|
|
|
|
time_diff: float = time_end - time_start
|
|
|
|
matched.confidence = confidence
|
|
|
|
matched.time = time_diff
|
|
|
|
|
|
|
|
logging.info("Found %s on redis cache, skipping SQLite...",
|
|
|
|
f"{artist} - {song}")
|
|
|
|
return matched
|
2025-01-18 13:26:00 -05:00
|
|
|
|
|
|
|
"""SQLite: Fallback"""
|
|
|
|
|
2025-01-14 11:10:13 -05:00
|
|
|
async with sqlite3.connect(self.cache_db, timeout=2) as db_conn:
|
|
|
|
await db_conn.enable_load_extension(True)
|
|
|
|
for ext in self.sqlite_exts:
|
|
|
|
await db_conn.load_extension(ext)
|
|
|
|
async with await db_conn.executescript(self.cache_pre_query) as _db_cursor:
|
2025-01-18 14:17:39 -05:00
|
|
|
if not random_search:
|
|
|
|
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
|
2025-01-15 20:17:49 -05:00
|
|
|
WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
|
|
|
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
|
2025-01-18 14:17:39 -05:00
|
|
|
search_params: tuple = (artist.strip(), song.strip(),
|
|
|
|
f"{artist.strip()} {song.strip()}")
|
|
|
|
|
2025-01-14 11:10:13 -05:00
|
|
|
async with await _db_cursor.execute(search_query, search_params) as db_cursor:
|
2025-01-14 14:17:18 -05:00
|
|
|
results: list = await db_cursor.fetchall()
|
|
|
|
result_tracks: list = []
|
2025-01-14 11:10:13 -05:00
|
|
|
for track in results:
|
|
|
|
(_id, _artist, _song, _lyrics, _src, _confidence) = track
|
|
|
|
result_tracks.append((_id, f"{_artist} - {_song}"))
|
2025-01-14 18:37:49 -05:00
|
|
|
if not random_search:
|
|
|
|
best_match: tuple|None = matcher.find_best_match(input_track=input_track,
|
2025-01-14 11:10:13 -05:00
|
|
|
candidate_tracks=result_tracks)
|
2025-01-14 18:37:49 -05:00
|
|
|
else:
|
2025-01-17 06:41:56 -05:00
|
|
|
best_match = (result_tracks[0], 100)
|
2025-01-14 11:10:13 -05:00
|
|
|
if not best_match:
|
|
|
|
return None
|
|
|
|
(candidate, confidence) = best_match
|
|
|
|
logging.info("Result found on %s", self.label)
|
2025-01-15 20:17:49 -05:00
|
|
|
matched = self.get_matched(sqlite_rows=results,
|
2025-01-14 11:10:13 -05:00
|
|
|
matched_candidate=candidate,
|
|
|
|
confidence=confidence)
|
2025-01-15 20:17:49 -05:00
|
|
|
time_end: float = time.time()
|
|
|
|
time_diff: float = time_end - time_start
|
|
|
|
matched.time = time_diff
|
|
|
|
return matched
|
2025-01-14 11:10:13 -05:00
|
|
|
except:
|
2025-01-18 13:26:00 -05:00
|
|
|
traceback.print_exc()
|
2025-01-16 09:21:50 -05:00
|
|
|
return
|