api/endpoints/lyric_search.py

200 lines
8.1 KiB
Python
Raw Normal View History

2024-08-10 22:49:00 -04:00
#!/usr/bin/env python3.12
2025-01-11 20:59:10 -05:00
import logging
2025-01-14 20:22:12 -05:00
import os
2024-08-10 22:49:00 -04:00
import urllib.parse
import regex
2025-01-14 20:22:12 -05:00
import aiosqlite as sqlite3
2024-08-11 13:49:07 -04:00
from fastapi import FastAPI, HTTPException
2025-02-15 21:09:33 -05:00
from fastapi.responses import JSONResponse
from typing import LiteralString, Optional, Union
2025-02-14 16:07:24 -05:00
from regex import Pattern
2025-02-11 11:19:52 -05:00
from .constructors import ValidTypeAheadRequest, ValidLyricRequest
2025-02-11 20:01:07 -05:00
from lyric_search.constructors import LyricsResult
from lyric_search.sources import aggregate
from lyric_search import notifier
2024-08-10 22:49:00 -04:00
2025-01-14 20:22:12 -05:00
class CacheUtils:
2025-02-15 21:09:33 -05:00
"""
Lyrics Cache DB Utils
"""
2025-02-14 16:07:24 -05:00
def __init__(self) -> None:
self.lyrics_db_path: LiteralString = os.path.join("/usr/local/share",
2025-01-24 19:26:07 -05:00
"sqlite_dbs", "cached_lyrics.db")
2025-01-14 20:22:12 -05:00
2025-02-15 21:09:33 -05:00
async def check_typeahead(self, s: str,
pre_query: Optional[str] = None) -> list[dict]:
"""
Check s against artists stored - for typeahead
"""
2025-01-14 20:22:12 -05:00
async with sqlite3.connect(self.lyrics_db_path,
timeout=2) as db_conn:
2025-02-14 16:07:24 -05:00
db_conn.row_factory = sqlite3.Row
2025-01-14 20:22:12 -05:00
if not pre_query:
2025-02-11 20:01:07 -05:00
query: str = "SELECT distinct(artist) FROM lyrics WHERE artist LIKE ? LIMIT 15"
query_params: tuple = (f"%{s}%",)
2025-01-14 20:22:12 -05:00
else:
2025-02-14 16:07:24 -05:00
query = "SELECT distinct(song) FROM lyrics WHERE artist LIKE ? AND song LIKE ? LIMIT 15"
query_params = (f"%{pre_query}%", f"%{s}%",)
2025-01-23 13:02:03 -05:00
async with await db_conn.execute(query, query_params) as db_cursor:
2025-01-14 20:22:12 -05:00
return await db_cursor.fetchall()
2024-08-10 22:49:00 -04:00
class LyricSearch(FastAPI):
2025-02-15 21:09:33 -05:00
"""
Lyric Search Endpoint
"""
def __init__(self, app: FastAPI,
2025-02-16 08:50:53 -05:00
util, constants) -> None:
2025-02-15 21:09:33 -05:00
self.app: FastAPI = app
2024-08-10 22:49:00 -04:00
self.util = util
self.constants = constants
2025-01-14 20:22:12 -05:00
self.cache_utils = CacheUtils()
self.notifier = notifier.DiscordNotifier()
2024-08-10 22:49:00 -04:00
2024-08-13 10:36:53 -04:00
2025-02-11 20:01:07 -05:00
self.endpoints: dict = {
2025-01-14 20:22:12 -05:00
"typeahead/artist": self.artist_typeahead_handler,
"typeahead/song": self.song_typeahead_handler,
2025-02-05 20:23:06 -05:00
"lyric_search": self.lyric_search_handler, # Preserving old endpoint path temporarily
"lyric/search": self.lyric_search_handler,
2024-08-13 10:36:53 -04:00
}
2025-02-11 20:01:07 -05:00
self.acceptable_request_sources: list = [
2024-08-10 22:49:00 -04:00
"WEB",
2024-08-17 06:01:18 -04:00
"WEB-RADIO",
2024-08-10 22:49:00 -04:00
"DISC-HAVOC",
2025-01-20 05:47:09 -05:00
"LIMNORIA-SHARED",
"IRC-SHARED",
2024-08-10 22:49:00 -04:00
]
2025-02-11 20:01:07 -05:00
self.lrc_regex: Pattern = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
2024-11-29 15:33:12 -05:00
2024-08-13 10:36:53 -04:00
for endpoint, handler in self.endpoints.items():
2025-02-05 20:23:06 -05:00
_schema_include = endpoint in ["lyric/search"]
2025-01-29 16:03:33 -05:00
app.add_api_route(f"/{endpoint}", handler, methods=["POST"], include_in_schema=_schema_include)
2024-08-13 19:50:02 -04:00
2025-02-15 21:09:33 -05:00
async def artist_typeahead_handler(self, data: ValidTypeAheadRequest) -> JSONResponse:
"""
Artist Type Ahead Handler
2025-02-16 08:17:27 -05:00
- **query**: The query
2025-02-15 21:09:33 -05:00
"""
2025-01-14 20:22:12 -05:00
if not isinstance(data.query, str) or len(data.query) < 2:
2025-02-15 21:09:33 -05:00
return JSONResponse(status_code=500, content={
2025-01-14 20:22:12 -05:00
'err': True,
'errorText': 'Invalid request',
2025-02-15 21:09:33 -05:00
})
2025-02-11 20:01:07 -05:00
query: str = data.query
2025-02-14 16:07:24 -05:00
typeahead_result: list[dict] = await self.cache_utils.check_typeahead(query)
2025-02-16 07:49:10 -05:00
typeahead_list: list[str] = [str(r['artist']) for r in typeahead_result]
2025-02-15 21:09:33 -05:00
return JSONResponse(content=typeahead_list)
2025-01-14 20:22:12 -05:00
2025-02-15 21:09:33 -05:00
async def song_typeahead_handler(self, data: ValidTypeAheadRequest) -> JSONResponse:
"""
Song Type Ahead Handler
2025-02-16 08:17:27 -05:00
- **query**: The query
- **pre_query**: The pre-query (artist)
2025-02-15 21:09:33 -05:00
"""
2025-01-20 05:47:09 -05:00
if not isinstance(data.pre_query, str)\
2025-02-15 21:09:33 -05:00
or not isinstance(data.query, str):
return JSONResponse(status_code=500, content={
2025-01-14 20:22:12 -05:00
'err': True,
'errorText': 'Invalid request',
2025-02-15 21:09:33 -05:00
})
2025-02-11 20:01:07 -05:00
pre_query: str = data.pre_query
query: str = data.query
2025-02-14 16:07:24 -05:00
typeahead_result: list[dict] = await self.cache_utils.check_typeahead(query, pre_query)
2025-02-15 21:18:20 -05:00
typeahead_list: list[str] = [str(r['song']) for r in typeahead_result]
2025-02-15 21:09:33 -05:00
return JSONResponse(content=typeahead_list)
2025-01-14 20:22:12 -05:00
2025-02-15 21:09:33 -05:00
async def lyric_search_handler(self, data: ValidLyricRequest) -> JSONResponse:
2025-01-13 20:47:39 -05:00
"""
Search for lyrics
2025-01-13 20:47:39 -05:00
- **a**: artist
- **s**: song
- **t**: track (artist and song combined) [used only if a & s are not used]
2025-01-29 16:03:33 -05:00
- **extra**: include extra details in response [optional, default: false]
- **lrc**: Request LRCs?
- **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional, default: none]
- **src**: the script/utility which initiated the request
- **excluded_sources**: sources to exclude [optional, default: none]
2025-01-13 20:47:39 -05:00
"""
if (not data.a or not data.s) and not data.t or not data.src:
2025-01-13 20:47:39 -05:00
raise HTTPException(detail="Invalid request", status_code=500)
if data.src.upper() not in self.acceptable_request_sources:
await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",
f"Unknown request source: {data.src}")
2025-02-15 21:09:33 -05:00
return JSONResponse(status_code=500, content={
2025-01-20 05:47:09 -05:00
'err': True,
'errorText': f'Unknown request source: {data.src}',
2025-02-15 21:09:33 -05:00
})
2025-01-19 07:01:07 -05:00
if not data.t:
2025-02-14 16:07:24 -05:00
search_artist: Optional[str] = data.a
search_song: Optional[str] = data.s
else:
2025-02-15 21:09:33 -05:00
t_split: tuple = tuple(data.t.split(" - ", maxsplit=1))
(search_artist, search_song) = t_split
2025-01-19 07:01:07 -05:00
if search_artist and search_song:
2025-02-14 16:07:24 -05:00
search_artist = str(self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_artist.strip()))
search_song = str(self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_song.strip()))
search_artist = urllib.parse.unquote(search_artist)
search_song = urllib.parse.unquote(search_song)
2025-02-14 16:07:24 -05:00
if not isinstance(search_artist, str) or not isinstance(search_song, str):
2025-02-15 21:09:33 -05:00
return JSONResponse(status_code=500, content={
2025-02-14 16:07:24 -05:00
'err': True,
'errorText': 'Invalid request',
2025-02-15 21:09:33 -05:00
})
2025-02-14 16:07:24 -05:00
excluded_sources: Optional[list] = data.excluded_sources
2025-01-14 18:37:49 -05:00
aggregate_search = aggregate.Aggregate(exclude_methods=excluded_sources)
2025-02-11 20:01:07 -05:00
plain_lyrics: bool = not data.lrc
2025-02-15 21:09:33 -05:00
result: Optional[Union[LyricsResult, dict]] = await aggregate_search.search(search_artist, search_song, plain_lyrics)
2025-01-17 07:48:29 -05:00
if not result:
2025-02-15 21:09:33 -05:00
return JSONResponse(content={
'err': True,
'errorText': 'Sources exhausted, lyrics not located.',
2025-02-15 21:09:33 -05:00
})
2025-01-17 07:48:29 -05:00
2025-02-14 16:07:24 -05:00
result = vars(result)
2025-01-17 07:48:29 -05:00
if data.sub and not data.lrc:
2025-02-11 20:01:07 -05:00
seeked_found_line: Optional[int] = None
lyric_lines: list[str] = result['lyrics'].strip().split(" / ")
for i, line in enumerate(lyric_lines):
2025-02-14 16:07:24 -05:00
line = regex.sub(r'\u2064', '', line.strip())
2025-01-17 07:54:17 -05:00
if data.sub.strip().lower() in line.strip().lower():
2025-02-14 16:07:24 -05:00
seeked_found_line = i
2025-01-17 07:54:17 -05:00
logging.debug("Found %s at %s, match for %s!",
line, seeked_found_line, data.sub) # REMOVEME: DEBUG
break
2025-01-17 06:41:56 -05:00
2025-01-17 05:53:05 -05:00
if not seeked_found_line:
2025-02-15 21:09:33 -05:00
return JSONResponse(status_code=500, content={
'err': True,
'errorText': 'Seek (a.k.a. subsearch) failed.',
'failed_seek': True,
2025-02-15 21:09:33 -05:00
})
result['lyrics'] = " / ".join(lyric_lines[seeked_found_line:])
2025-02-16 07:49:10 -05:00
result['confidence'] = int(result['confidence'])
2025-01-15 20:17:49 -05:00
result['time'] = f'{float(result['time']):.4f}'
2025-01-17 07:48:29 -05:00
if plain_lyrics:
result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
else:
# Swap lyrics key for 'lrc'
result['lrc'] = result['lyrics']
result.pop('lyrics')
2025-01-19 07:01:07 -05:00
if "cache" in result['src']:
2025-01-15 20:17:49 -05:00
result['from_cache'] = True
2025-01-29 16:03:33 -05:00
if not data.extra:
result.pop('src')
2025-02-15 21:09:33 -05:00
return JSONResponse(content=result)