api/lyric_search/sources/redis_cache.py
2025-02-18 14:37:37 -05:00

228 lines
10 KiB
Python

import logging
import traceback
import json
import time
import sys
import regex
from regex import Pattern
import asyncio
from typing import Union, Optional
sys.path.insert(1,'..')
from lyric_search import notifier
from lyric_search.constructors import LyricsResult
import redis.asyncio as redis
from redis.commands.search.query import Query # type: ignore
from redis.commands.search.indexDefinition import IndexDefinition, IndexType # type: ignore
from redis.commands.search.field import TextField, TagField # type: ignore
from redis.commands.json.path import Path # type: ignore
from . import private
logger = logging.getLogger()
log_level = logging.getLevelName(logger.level)
class RedisException(Exception):
"""
Redis Exception
"""
class RedisCache:
"""
Redis Cache Methods
"""
def __init__(self) -> None:
self.redis_client: redis.Redis = redis.Redis(password=private.REDIS_PW)
self.notifier = notifier.DiscordNotifier()
self.notify_warnings = False
self.regexes: list[Pattern] = [
regex.compile(r'\-'),
regex.compile(r'[^a-zA-Z0-9\s]'),
]
try:
asyncio.get_event_loop().create_task(self.create_index())
except Exception as e:
logging.debug("Failed to create redis create_index task: %s",
str(e))
async def create_index(self) -> None:
"""Create Index"""
try:
schema = (
TextField("$.search_artist", as_name="artist"),
TextField("$.search_song", as_name="song"),
TextField("$.src", as_name="src"),
TextField("$.lyrics", as_name="lyrics")
)
result = await self.redis_client.ft().create_index(
schema, definition=IndexDefinition(prefix=["lyrics:"], index_type=IndexType.JSON))
if str(result) != "OK":
raise RedisException(f"Redis: Failed to create index: {result}")
except Exception as e:
logging.debug("Failed to create redis index: %s",
str(e))
def sanitize_input(self, artist: str, song: str,
fuzzy: Optional[bool] = False) -> tuple[str, str]:
"""
Sanitize artist/song input (convert to redis matchable fuzzy query)
Args:
artist: Input artist
song: Input song
fuzzy: Whether to create fuzzy query str
Returns:
tuple[str, str]: Tuple containing the 2 output strings (artist, song)
"""
artist = self.regexes[0].sub("", artist)
artist = self.regexes[1].sub("", artist).strip()
song = self.regexes[0].sub("", song)
song = self.regexes[1].sub("", song).strip()
if fuzzy:
artist = " ".join([f"(%{artist_word}%)" for artist_word in artist.split(" ")])
song = " ".join([f"(%{song_word}%)" for song_word in song.split(" ")])
return (artist, song)
async def increment_found_count(self, src: str) -> None:
"""
Increment the found count for a source
Args:
src (str): The source to increment
Returns:
None
"""
try:
src = src.strip().lower()
await self.redis_client.incr(f"returned:{src}")
except Exception as e:
file = __file__.rsplit("/", maxsplit=1)[-1]
await self.notifier.send(f"ERROR @ {file}", str(e))
traceback.print_exc()
async def get_found_counts(self) -> Optional[dict]:
"""
Get found counts for all sources (and failed count)
Args:
None
Returns:
dict: In the form {'source': count, 'source2': count, ...}
"""
try:
sources: list = ["cache", "lrclib", "genius", "failed"]
counts: dict[str, int] = {}
for src in sources:
src_found_count = await self.redis_client.get(f"returned:{src}")
if not isinstance(src_found_count, int):
return None
counts[src] = int(src_found_count) # Redis returns bytes
return counts
except Exception as e:
file = __file__.rsplit("/", maxsplit=1)[-1]
await self.notifier.send(f"ERROR @ {file}", str(e))
traceback.print_exc()
return None
async def search(self, artist: Optional[str] = None,
song: Optional[str] = None,
lyrics: Optional[str] = None) -> Optional[list[tuple]]:
"""
Search Redis Cache
Args:
artist (Optional[str]): artist to search
song (Optional[str]): song to search
lyrics (Optional[str]): lyrics to search (optional, used in place of artist/song if provided)
Returns:
list[tuple]: List of redis results, tuple's first value is the redis key, second is the returned data
"""
try:
fuzzy_artist = None
fuzzy_song = None
is_random_search = artist == "!" and song == "!"
if lyrics:
# to code later
raise RedisException("Lyric search not yet implemented")
if not is_random_search:
logging.debug("Redis: Searching normally first")
if not artist or not song:
logging.info("redis_cache:: search failed: No artist or song provided.")
return None
(artist, song) = self.sanitize_input(artist, song)
logging.debug("Seeking: %s - %s", artist, song)
search_res: Union[dict, list] = await self.redis_client.ft().search(Query( # type: ignore
f"@artist:{artist} @song:{song}"
))
search_res_out: list[tuple] = [(result['id'].split(":",
maxsplit=1)[1], dict(json.loads(result['json'])))
for result in search_res.docs] # type: ignore
if not search_res_out:
logging.debug("Redis: Normal search failed, trying with fuzzy search")
short_artist = " ".join(artist.split(" ")[0:5])
short_song = " ".join(song.split(" ")[0:5])
(fuzzy_artist, fuzzy_song) = self.sanitize_input(artist=short_artist.strip(),
song=short_song.strip(), fuzzy=True)
search_res = await self.redis_client.ft().search(Query( # type: ignore
f"@artist:{fuzzy_artist} @song:{fuzzy_song}"
))
search_res_out = [(result['id'].split(":",
maxsplit=1)[1], dict(json.loads(result['json'])))
for result in search_res.docs] # type: ignore
else:
random_redis_key: str = await self.redis_client.randomkey()
out_id: str = str(random_redis_key).split(":",
maxsplit=1)[1][:-1]
search_res = await self.redis_client.json().get(random_redis_key)
search_res_out = [(out_id, search_res)]
if not search_res_out and self.notify_warnings:
await self.notifier.send("WARNING", f"Redis cache miss for: \n## *{artist} - {song}*")
return search_res_out
except Exception as e:
traceback.print_exc()
# await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}", f"{str(e)}\nSearch was: {artist} - {song}; fuzzy: {fuzzy_artist} - {fuzzy_song}")
return None
async def redis_store(self, sqlite_id: int,
lyr_result: LyricsResult) -> None:
"""
Store lyrics to redis cache
Args:
sqlite_id (int): the row id of the related SQLite db insertion
lyr_result (LyricsResult): the returned lyrics to cache
Returns:
None
"""
try:
(search_artist, search_song) = self.sanitize_input(lyr_result.artist,
lyr_result.song)
redis_mapping: dict = {
'id': sqlite_id,
'src': lyr_result.src,
'date_retrieved': time.time(),
'artist': lyr_result.artist,
'search_artist': search_artist,
'search_song': search_song,
'search_artistsong': f'{search_artist}\n{search_song}',
'song': lyr_result.song,
'artistsong': f"{lyr_result.artist}\n{lyr_result.song}",
'confidence': lyr_result.confidence,
'lyrics': lyr_result.lyrics,
'tags': '(none)',
'liked': 0,
}
newkey: str = f"lyrics:000{sqlite_id}"
jsonset: bool = await self.redis_client.json().set(newkey, Path.root_path(),
redis_mapping)
if not jsonset:
raise RedisException(f"Failed to store {lyr_result.artist} - {lyr_result.song} (SQLite id: {sqlite_id}) to redis:\n{jsonset}")
logging.info("Stored %s - %s (related SQLite Row ID: %s) to %s",
lyr_result.artist, lyr_result.song, sqlite_id, newkey)
await self.notifier.send("INFO",
f"Stored {lyr_result.artist} - {lyr_result.song} (related SQLite Row ID: {sqlite_id}) to redis: {newkey}")
except Exception as e:
file = __file__.rsplit("/", maxsplit=1)[-1]
await self.notifier.send(f"ERROR @ {file}",
f"Failed to store {lyr_result.artist} - {lyr_result.song}\
(SQLite id: {sqlite_id}) to Redis:\n{str(e)}")