api/endpoints/lyric_search.py

417 lines
17 KiB
Python
Raw Normal View History

2024-08-10 22:49:00 -04:00
#!/usr/bin/env python3.12
2025-01-11 20:59:10 -05:00
# pylint: disable=bare-except, broad-exception-raised, broad-exception-caught
2024-08-10 22:49:00 -04:00
import importlib
2025-01-11 20:59:10 -05:00
import traceback
import logging
2025-01-14 20:22:12 -05:00
import os
2024-08-10 22:49:00 -04:00
import urllib.parse
2025-01-19 07:01:07 -05:00
from typing import Optional
2024-08-10 22:49:00 -04:00
import regex
2024-11-29 15:33:12 -05:00
import aiohttp
2025-01-14 20:22:12 -05:00
import aiosqlite as sqlite3
2024-08-11 13:49:07 -04:00
from fastapi import FastAPI, HTTPException
2025-01-13 20:47:39 -05:00
from pydantic import BaseModel
from lyric_search_new.sources import aggregate
2024-08-10 22:49:00 -04:00
2024-08-11 07:42:47 -04:00
class ValidLyricRequest(BaseModel):
"""
- **a**: artist
- **s**: song
- **t**: track (artist and song combined) [used only if a & s are not used]
- **extra**: include extra details in response [optional, default: false]
2024-11-29 15:33:12 -05:00
- **lrc**: Request LRCs?
2024-08-11 13:49:07 -04:00
- **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional]
2024-08-11 07:42:47 -04:00
- **src**: the script/utility which initiated the request
2025-01-14 18:37:49 -05:00
- **excluded_sources**: sources to exclude (new only)
2024-08-11 07:42:47 -04:00
"""
2024-08-11 08:12:44 -04:00
2024-08-10 22:49:00 -04:00
a: str | None = None
s: str | None = None
t: str | None = None
sub: str | None = None
extra: bool | None = False
2024-11-29 15:33:12 -05:00
lrc: bool | None = False
2024-08-10 22:57:45 -04:00
src: str
2025-01-14 18:37:49 -05:00
excluded_sources: list | None = None
2024-08-10 22:49:00 -04:00
2024-08-11 13:49:07 -04:00
class Config: # pylint: disable=missing-class-docstring too-few-public-methods
2024-08-11 09:50:41 -04:00
schema_extra = {
"example": {
"a": "eminem",
"s": "rap god",
"src": "WEB",
2024-11-29 15:33:12 -05:00
"extra": True,
"lrc": False,
2024-08-11 09:50:41 -04:00
}
}
2025-01-14 20:22:12 -05:00
class ValidTypeAheadRequest(BaseModel):
"""
- **query**: query string
"""
pre_query: str|None = None
query: str
2024-08-11 09:50:41 -04:00
2024-08-13 19:50:02 -04:00
class ValidLyricSearchLogRequest(BaseModel):
"""
- **webradio**: whether or not to include requests generated automatically by the radio page on codey.lol, defaults to False
"""
webradio: bool = False
2025-01-14 20:22:12 -05:00
class CacheUtils:
"""Lyrics Cache DB Utils"""
def __init__(self):
self.lyrics_db_path = os.path.join("/", "var", "lib",
"singerdbs", "cached_lyrics.db")
async def check_typeahead(self, s: str, pre_query: str | None = None):
"""Check s against artists stored - for typeahead"""
async with sqlite3.connect(self.lyrics_db_path,
timeout=2) as db_conn:
db_conn.row_factory = lambda c, r: dict([(col[0], r[idx]) for idx, col in enumerate(c.description)])
if not pre_query:
query = "SELECT distinct(artist) FROM lyrics WHERE artist LIKE ? LIMIT 15"
query_params = (f"%{s}%",)
else:
query = "SELECT distinct(song) FROM lyrics WHERE artist LIKE ? AND song LIKE ? LIMIT 15"
query_params = (f"%{pre_query}%", f"%{s}%",)
async with db_conn.execute(query, query_params) as db_cursor:
return await db_cursor.fetchall()
2024-08-10 22:49:00 -04:00
class LyricSearch(FastAPI):
2024-08-11 13:49:07 -04:00
"""Lyric Search Endpoint"""
2024-08-13 19:21:48 -04:00
def __init__(self, app: FastAPI, util, constants, glob_state): # pylint: disable=super-init-not-called
2024-08-10 22:49:00 -04:00
self.app = app
self.util = util
self.constants = constants
2024-08-13 19:21:48 -04:00
self.glob_state = glob_state
2025-01-14 20:22:12 -05:00
self.cache_utils = CacheUtils()
2024-08-10 22:49:00 -04:00
self.lyrics_engine = importlib.import_module("lyrics_engine").LyricsEngine()
2024-08-11 17:04:06 -04:00
self.endpoint_name = "lyric_search"
self.endpoint2_name = "lyric_cache_list"
2024-08-13 10:36:53 -04:00
self.endpoints = {
2025-01-14 20:22:12 -05:00
"typeahead/artist": self.artist_typeahead_handler,
"typeahead/song": self.song_typeahead_handler,
# "lyric_search": self.lyric_search_handler,
"lyric_search": self.new_test, #test
2024-08-13 10:36:53 -04:00
"lyric_cache_list": self.lyric_cache_list_handler,
2025-01-13 20:47:39 -05:00
"lyric_search_history": self.lyric_search_log_handler,
"lyric_search_test": self.new_test,
2024-08-13 10:36:53 -04:00
}
2024-08-10 22:49:00 -04:00
self.acceptable_request_sources = [
"WEB",
2024-08-17 06:01:18 -04:00
"WEB-RADIO",
2024-08-10 22:49:00 -04:00
"IRC-MS",
"IRC-FS",
"IRC-KALI",
"DISC-ACES",
"DISC-HAVOC",
"IRC-SHARED"
2024-08-10 22:49:00 -04:00
]
2024-11-29 15:33:12 -05:00
self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
2024-08-13 10:36:53 -04:00
for endpoint, handler in self.endpoints.items():
app.add_api_route(f"/{endpoint}/", handler, methods=["POST"])
2024-08-11 17:04:06 -04:00
async def lyric_cache_list_handler(self):
"""
Get currently cached lyrics entries
"""
return {
'err': False,
'data': await self.lyrics_engine.listCacheEntries()
}
2024-08-13 19:50:02 -04:00
2025-01-14 20:22:12 -05:00
async def artist_typeahead_handler(self, data: ValidTypeAheadRequest):
"""Artist Type Ahead Handler"""
if not isinstance(data.query, str) or len(data.query) < 2:
return {
'err': True,
'errorText': 'Invalid request',
}
query = data.query
typeahead_result = await self.cache_utils.check_typeahead(query)
typeahead_list = [str(r.get('artist')) for r in typeahead_result]
return typeahead_list
async def song_typeahead_handler(self, data: ValidTypeAheadRequest):
"""Song Type Ahead Handler"""
if not isinstance(data.pre_query, str) or len(data.pre_query) < 2\
or not isinstance(data.query, str) or len(data.query) < 2:
return {
'err': True,
'errorText': 'Invalid request',
}
pre_query = data.pre_query
query = data.query
typeahead_result = await self.cache_utils.check_typeahead(query, pre_query)
typeahead_list = [str(r.get('song')) for r in typeahead_result]
return typeahead_list
2024-08-13 19:50:02 -04:00
async def lyric_search_log_handler(self, data: ValidLyricSearchLogRequest):
2025-01-11 20:59:10 -05:00
"""Lyric Search Log Handler"""
2024-08-13 19:50:02 -04:00
include_radio = data.webradio
await self.glob_state.increment_counter('lyrichistory_requests')
last_10k_sings = await self.lyrics_engine.getHistory(limit=10000, webradio=include_radio)
return {
'err': False,
'history': last_10k_sings
}
2025-01-13 20:47:39 -05:00
async def new_test(self, data: ValidLyricRequest):
"""
Search for lyrics (testing)
2024-08-13 19:50:02 -04:00
2025-01-13 20:47:39 -05:00
- **a**: artist
- **s**: song
- **t**: track (artist and song combined) [used only if a & s are not used] [unused]
- **extra**: include extra details in response [optional, default: false] [unused]
- **lrc**: Request LRCs?
- **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional, default: none]
- **src**: the script/utility which initiated the request
2025-01-14 18:37:49 -05:00
- **excluded_sources**: sources to exclude
2025-01-13 20:47:39 -05:00
"""
if not data.a or not data.s or not data.src:
2025-01-13 20:47:39 -05:00
raise HTTPException(detail="Invalid request", status_code=500)
if data.src.upper() not in self.acceptable_request_sources:
raise HTTPException(detail="Invalid request", status_code=500)
2025-01-19 07:01:07 -05:00
search_artist: Optional[str] = data.a
search_song: Optional[str] = data.s
if search_artist and search_song:
search_artist = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_artist.strip())
search_song = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_song.strip())
search_artist = urllib.parse.unquote(search_artist)
search_song = urllib.parse.unquote(search_song)
2025-01-14 18:37:49 -05:00
excluded_sources = data.excluded_sources
aggregate_search = aggregate.Aggregate(exclude_methods=excluded_sources)
2025-01-16 07:14:36 -05:00
plain_lyrics = not data.lrc
2025-01-19 07:01:07 -05:00
result = await aggregate_search.search(search_artist, search_song, plain_lyrics)
2025-01-17 07:48:29 -05:00
if not result:
return {
'err': True,
'errorText': 'Sources exhausted, lyrics not located.',
}
2025-01-17 07:48:29 -05:00
result = result.todict()
if data.sub and not data.lrc:
2025-01-19 07:01:07 -05:00
seeked_found_line = None
2025-01-17 07:48:29 -05:00
lyric_lines = result['lyrics'].strip().split(" / ")
for i, line in enumerate(lyric_lines):
2025-01-17 07:54:17 -05:00
line = regex.sub(r'\u2064', '', line.strip())
if data.sub.strip().lower() in line.strip().lower():
seeked_found_line = i
logging.debug("Found %s at %s, match for %s!",
line, seeked_found_line, data.sub) # REMOVEME: DEBUG
break
2025-01-17 06:41:56 -05:00
2025-01-17 05:53:05 -05:00
if not seeked_found_line:
return {
'failed_seek': True,
}
result['lyrics'] = " / ".join(lyric_lines[seeked_found_line:])
2025-01-17 06:41:56 -05:00
result['confidence'] = int(result.get('confidence', 0))
2025-01-15 20:17:49 -05:00
result['time'] = f'{float(result['time']):.4f}'
2025-01-17 07:48:29 -05:00
if plain_lyrics:
result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
else:
# Swap lyrics key for 'lrc'
result['lrc'] = result['lyrics']
result.pop('lyrics')
2025-01-19 07:01:07 -05:00
if "cache" in result['src']:
2025-01-15 20:17:49 -05:00
result['from_cache'] = True
"""
REMOVE BELOW AFTER TESTING IS DONE
"""
# if not data.extra:
# result.pop('src')
2025-01-14 07:45:34 -05:00
return result
2025-01-13 20:47:39 -05:00
2024-08-11 13:49:07 -04:00
2024-08-11 09:08:00 -04:00
async def lyric_search_handler(self, data: ValidLyricRequest):
2024-08-11 07:42:47 -04:00
"""
Search for lyrics
- **a**: artist
- **s**: song
- **t**: track (artist and song combined) [used only if a & s are not used]
- **extra**: include extra details in response [optional, default: false]
2024-11-29 15:33:12 -05:00
- **lrc**: Request LRCs?
2024-08-11 07:42:47 -04:00
- **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional, default: none]
- **src**: the script/utility which initiated the request
"""
2024-11-29 15:33:12 -05:00
lrc = data.lrc
2024-08-10 22:49:00 -04:00
src = data.src.upper()
2024-08-11 13:49:07 -04:00
if not src in self.acceptable_request_sources:
2024-08-13 19:21:48 -04:00
raise HTTPException(detail="Invalid request source", status_code=403)
await self.glob_state.increment_counter('lyric_requests')
2024-08-11 13:49:07 -04:00
search_artist = data.a
search_song = data.s
search_text = data.t
add_extras = data.extra
sub_search = data.sub
search_object = None
random_song_requested = (search_artist == "!" and search_song == "!")
2024-08-10 22:49:00 -04:00
query_valid = (
2024-08-11 13:49:07 -04:00
not(search_artist is None) and
not(search_song is None) and
len(search_artist) >= 1 and
len(search_song) >= 1 and
len(search_artist) + len(search_song) >= 3
2024-08-10 22:49:00 -04:00
)
2024-08-11 13:49:07 -04:00
if not random_song_requested and (not search_text and not query_valid):
2024-08-10 22:49:00 -04:00
return {
"err": True,
"errorText": "Invalid parameters"
2024-08-11 13:49:07 -04:00
}
if search_artist and search_song:
search_artist = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_artist.strip())
search_song = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_song.strip())
search_artist = urllib.parse.unquote(search_artist)
search_song = urllib.parse.unquote(search_song)
if search_text is None:
# pylint: disable=consider-using-f-string
search_object = self.lyrics_engine.create_query_object("%s : %s" % (search_artist, search_song))
if sub_search:
2024-09-11 07:53:19 -04:00
sub_search = regex.sub(r'\s{2,}', ' ', sub_search.strip())
search_object = self.lyrics_engine.create_query_object("%s : %s : %s" % (search_artist, search_song, sub_search))
2024-08-11 08:03:36 -04:00
else:
2024-08-11 13:49:07 -04:00
search_object = self.lyrics_engine.create_query_object(str(search_text))
2024-11-29 15:33:12 -05:00
if lrc:
search_worker = await self.lyrics_engine.grabFromSpotify(searching=search_object,
lrc=True)
spotify_lyrics_unsynced = True
if search_worker and search_worker.get('l'):
for line in search_worker.get('l'):
if line.get('timeTag') and line.get('timeTag') != "00:00.00":
spotify_lyrics_unsynced = False
if not search_worker or spotify_lyrics_unsynced:
# Try LRCLib before failing out
try:
lrclib_api_url = "https://lrclib.net/api/get"
sane_artist = urllib.parse.quote_plus(search_artist)
sane_track = urllib.parse.quote_plus(search_song)
async with aiohttp.ClientSession() as session:
async with session.get(f"{lrclib_api_url}?artist_name={sane_artist}&track_name={sane_track}") as request:
request.raise_for_status()
response_json = await request.json()
if not "syncedLyrics" in response_json:
raise BaseException("LRCLib Fallback Failed")
lrc_content = response_json.get('syncedLyrics')
returned_artist = response_json.get('artistName')
returned_song = response_json.get('trackName')
2025-01-11 20:59:10 -05:00
logging.debug("Synced Lyrics [LRCLib]: %s",
lrc_content)
2024-11-29 15:33:12 -05:00
lrc_content_out = []
for line in lrc_content.split("\n"):
_timetag = None
_words = None
if not line.strip():
continue
reg_helper = regex.findall(self.lrc_regex, line.strip())
if not reg_helper:
continue
reg_helper = reg_helper[0]
_timetag = reg_helper[0]
if not reg_helper[1].strip():
_words = ""
else:
_words = reg_helper[1]
lrc_content_out.append({
"timeTag": _timetag,
"words": _words,
})
return {
'err': False,
'artist': returned_artist,
'song': returned_song,
'combo_lev': "N/A",
'lrc': lrc_content_out,
'from_cache': False,
'src': 'Alt LRC SRC',
'reqn': await self.glob_state.get_counter('lyric_requests'),
}
except:
2025-01-11 20:59:10 -05:00
traceback.print_exc()
2024-11-29 15:33:12 -05:00
return {
'err': True,
'errorText': 'Search failed!',
}
return {
'err': True,
'errorText': 'Search failed!',
}
2025-01-11 20:59:10 -05:00
if lrc:
2024-11-29 15:33:12 -05:00
return {
'err': False,
'artist': search_worker['artist'],
'song': search_worker['song'],
'combo_lev': search_worker['combo_lev'],
'lrc': search_worker['l'],
'from_cache': False,
'src': search_worker['method'],
'reqn': await self.glob_state.get_counter('lyric_requests'),
}
2025-01-11 20:59:10 -05:00
search_worker = await self.lyrics_engine.lyrics_worker(searching=search_object)
2024-11-29 15:33:12 -05:00
2024-08-10 22:49:00 -04:00
2024-08-11 13:49:07 -04:00
if not search_worker or not 'l' in search_worker.keys():
2024-08-13 19:21:48 -04:00
await self.glob_state.increment_counter('failedlyric_requests')
2024-08-10 22:49:00 -04:00
return {
'err': True,
'errorText': 'Sources exhausted, lyrics not located.'
}
await self.lyrics_engine.storeHistEntry(artist=search_worker.get('artist'),
song=search_worker.get('song'),
retr_method=search_worker.get('method'),
request_src=src.strip())
2024-08-10 22:49:00 -04:00
return {
'err': False,
2024-08-11 13:49:07 -04:00
'artist': search_worker['artist'],
'song': search_worker['song'],
'combo_lev': f'{search_worker['combo_lev']:.2f}',
'lyrics': regex.sub(r"\s/\s", "<br>", " ".join(search_worker['l'])),
'from_cache': search_worker['method'].strip().lower().startswith("local cache"),
'src': search_worker['method'] if add_extras else None,
2024-08-13 19:21:48 -04:00
'reqn': await self.glob_state.get_counter('lyric_requests')
2024-08-10 22:49:00 -04:00
}