api/endpoints/lyric_search.py

#!/usr/bin/env python3.12

import logging
import os
import urllib.parse
import regex
import aiosqlite as sqlite3
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from typing import LiteralString, Optional, Union
from regex import Pattern
from .constructors import ValidTypeAheadRequest, ValidLyricRequest
from lyric_search.constructors import LyricsResult
from lyric_search.sources import aggregate 
from lyric_search import notifier

class CacheUtils:
    """
    Lyrics Cache DB Utils
    """
    def __init__(self) -> None:
        self.lyrics_db_path: LiteralString = os.path.join("/usr/local/share",
                                        "sqlite_dbs", "cached_lyrics.db")    
        
    async def check_typeahead(self, s: str,
                              pre_query: Optional[str] = None) -> list[dict]:
        """
        Check s against artists stored - for typeahead
        """
        async with sqlite3.connect(self.lyrics_db_path,
                                   timeout=2) as db_conn:
            db_conn.row_factory = sqlite3.Row
            if not pre_query:
                query: str = "SELECT distinct(artist) FROM lyrics WHERE artist LIKE ? LIMIT 15"
                query_params: tuple = (f"%{s}%",)
            else:
                query = "SELECT distinct(song) FROM lyrics WHERE artist LIKE ? AND song LIKE ? LIMIT 15"
                query_params = (f"%{pre_query}%", f"%{s}%",)
            async with await db_conn.execute(query, query_params) as db_cursor:
                return await db_cursor.fetchall()


class LyricSearch(FastAPI):
    """
    Lyric Search Endpoint
    """
    def __init__(self, app: FastAPI,
                 util, constants) -> None:
        self.app: FastAPI = app
        self.util = util
        self.constants = constants
        self.cache_utils = CacheUtils()
        self.notifier = notifier.DiscordNotifier()


        self.endpoints: dict = {
            "typeahead/artist": self.artist_typeahead_handler,
            "typeahead/song": self.song_typeahead_handler,
            "lyric_search": self.lyric_search_handler, # Preserving old endpoint path temporarily
            "lyric/search": self.lyric_search_handler,
        }

        self.acceptable_request_sources: list = [
            "WEB",
            "WEB-RADIO",
            "DISC-HAVOC",
            "LIMNORIA-SHARED",
            "IRC-SHARED",
        ]

        self.lrc_regex: Pattern = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')

        for endpoint, handler in self.endpoints.items():
            _schema_include = endpoint in ["lyric/search"]
            app.add_api_route(f"/{endpoint}", handler, methods=["POST"], include_in_schema=_schema_include)
    
    async def artist_typeahead_handler(self, data: ValidTypeAheadRequest) -> JSONResponse:
        """
        Artist Type Ahead Handler
        - **query**: The query
        """
        if not isinstance(data.query, str) or len(data.query) < 2:
            return JSONResponse(status_code=500, content={
                'err': True,
                'errorText': 'Invalid request',
            })
        query: str = data.query
        typeahead_result: list[dict] = await self.cache_utils.check_typeahead(query)
        typeahead_list: list[str] = [str(r['artist']) for r in typeahead_result]
        return JSONResponse(content=typeahead_list)
    
    async def song_typeahead_handler(self, data: ValidTypeAheadRequest) -> JSONResponse:
        """
        Song Type Ahead Handler
        - **query**: The query
        - **pre_query**: The pre-query (artist)
        """
        if not isinstance(data.pre_query, str)\
            or not isinstance(data.query, str):
            return JSONResponse(status_code=500, content={
                'err': True,
                'errorText': 'Invalid request',
            })
        pre_query: str = data.pre_query
        query: str = data.query
        typeahead_result: list[dict] = await self.cache_utils.check_typeahead(query, pre_query)
        typeahead_list: list[str] = [str(r['song']) for r in typeahead_result]
        return JSONResponse(content=typeahead_list)
    
    async def lyric_search_handler(self, data: ValidLyricRequest) -> JSONResponse:
        """
        Search for lyrics
        - **a**: artist
        - **s**: song
        - **t**: track (artist and song combined) [used only if a & s are not used] 
        - **extra**: include extra details in response [optional, default: false]
        - **lrc**: Request LRCs? 
        - **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional, default: none]
        - **src**: the script/utility which initiated the request
        - **excluded_sources**: sources to exclude [optional, default: none]
        """
        if (not data.a or not data.s) and not data.t or not data.src:
            raise HTTPException(detail="Invalid request", status_code=500)
        
        if data.src.upper() not in self.acceptable_request_sources:
            await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",
                                     f"Unknown request source: {data.src}")
            return JSONResponse(status_code=500, content={
                'err': True,
                'errorText': f'Unknown request source: {data.src}',
            })
        
        if not data.t:
            search_artist: Optional[str] = data.a
            search_song: Optional[str] = data.s
        else:
            t_split: tuple = tuple(data.t.split(" - ", maxsplit=1))
            (search_artist, search_song) = t_split
        
        if search_artist and search_song:
            search_artist = str(self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_artist.strip()))
            search_song = str(self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_song.strip()))
            search_artist = urllib.parse.unquote(search_artist)
            search_song = urllib.parse.unquote(search_song)

        if not isinstance(search_artist, str) or not isinstance(search_song, str):
            return JSONResponse(status_code=500, content={
                'err': True,
                'errorText': 'Invalid request',
            })

        excluded_sources: Optional[list] = data.excluded_sources
        aggregate_search = aggregate.Aggregate(exclude_methods=excluded_sources)
        plain_lyrics: bool = not data.lrc
        result: Optional[Union[LyricsResult, dict]] = await aggregate_search.search(search_artist, search_song, plain_lyrics)
        
        if not result:
            return JSONResponse(content={
                'err': True,
                'errorText': 'Sources exhausted, lyrics not located.',
            })
        
        result = vars(result)
        
        if data.sub and not data.lrc:
            seeked_found_line: Optional[int] = None
            lyric_lines: list[str] = result['lyrics'].strip().split(" / ")        
            for i, line in enumerate(lyric_lines):
                line = regex.sub(r'\u2064', '', line.strip())
                if data.sub.strip().lower() in line.strip().lower():
                    seeked_found_line = i
                    logging.debug("Found %s at %s, match for %s!",
                                  line, seeked_found_line, data.sub) # REMOVEME: DEBUG
                    break

            if not seeked_found_line:
                return JSONResponse(status_code=500, content={
                            'err': True,
                            'errorText': 'Seek (a.k.a. subsearch) failed.',
                            'failed_seek': True,
                    })
            result['lyrics'] = " / ".join(lyric_lines[seeked_found_line:])
        
        result['confidence'] = int(result['confidence'])
        result['time'] = f'{float(result['time']):.4f}'
        
        if plain_lyrics:
            result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
        else:
            # Swap lyrics key for 'lrc'
            result['lrc'] = result['lyrics']
            result.pop('lyrics')

        if "cache" in result['src']:
            result['from_cache'] = True

        if not data.extra:
             result.pop('src')
             
        return JSONResponse(content=result)
test 2024-08-10 22:49:00 -04:00			`#!/usr/bin/env python3.12`

cleanup 2025-01-11 20:59:10 -05:00			`import logging`
typeahead 2025-01-14 20:22:12 -05:00			`import os`
test 2024-08-10 22:49:00 -04:00			`import urllib.parse`
			`import regex`
typeahead 2025-01-14 20:22:12 -05:00			`import aiosqlite as sqlite3`
cleanup 2024-08-11 13:49:07 -04:00			`from fastapi import FastAPI, HTTPException`
cleanup 2025-02-15 21:09:33 -05:00			`from fastapi.responses import JSONResponse`
			`from typing import LiteralString, Optional, Union`
cleanup 2025-02-14 16:07:24 -05:00			`from regex import Pattern`
refactoring 2025-02-11 11:19:52 -05:00			`from .constructors import ValidTypeAheadRequest, ValidLyricRequest`
significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`from lyric_search.constructors import LyricsResult`
lyric_search_new renamed to lyric_search 2025-01-24 09:10:54 -05:00			`from lyric_search.sources import aggregate`
			`from lyric_search import notifier`
test 2024-08-10 22:49:00 -04:00
typeahead 2025-01-14 20:22:12 -05:00			`class CacheUtils:`
cleanup 2025-02-15 21:09:33 -05:00			`"""`
			`Lyrics Cache DB Utils`
			`"""`
cleanup 2025-02-14 16:07:24 -05:00			`def __init__(self) -> None:`
			`self.lyrics_db_path: LiteralString = os.path.join("/usr/local/share",`
requests_async 2025-01-24 19:26:07 -05:00			`"sqlite_dbs", "cached_lyrics.db")`
typeahead 2025-01-14 20:22:12 -05:00
cleanup 2025-02-15 21:09:33 -05:00			`async def check_typeahead(self, s: str,`
			`pre_query: Optional[str] = None) -> list[dict]:`
			`"""`
			`Check s against artists stored - for typeahead`
			`"""`
typeahead 2025-01-14 20:22:12 -05:00			`async with sqlite3.connect(self.lyrics_db_path,`
			`timeout=2) as db_conn:`
cleanup 2025-02-14 16:07:24 -05:00			`db_conn.row_factory = sqlite3.Row`
typeahead 2025-01-14 20:22:12 -05:00			`if not pre_query:`
significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`query: str = "SELECT distinct(artist) FROM lyrics WHERE artist LIKE ? LIMIT 15"`
			`query_params: tuple = (f"%{s}%",)`
typeahead 2025-01-14 20:22:12 -05:00			`else:`
cleanup 2025-02-14 16:07:24 -05:00			`query = "SELECT distinct(song) FROM lyrics WHERE artist LIKE ? AND song LIKE ? LIMIT 15"`
			`query_params = (f"%{pre_query}%", f"%{s}%",)`
various 2025-01-23 13:02:03 -05:00			`async with await db_conn.execute(query, query_params) as db_cursor:`
typeahead 2025-01-14 20:22:12 -05:00			`return await db_cursor.fetchall()`


test 2024-08-10 22:49:00 -04:00			`class LyricSearch(FastAPI):`
cleanup 2025-02-15 21:09:33 -05:00			`"""`
			`Lyric Search Endpoint`
			`"""`
			`def __init__(self, app: FastAPI,`
cleanup/rm pylint ignores 2025-02-16 08:50:53 -05:00			`util, constants) -> None:`
cleanup 2025-02-15 21:09:33 -05:00			`self.app: FastAPI = app`
test 2024-08-10 22:49:00 -04:00			`self.util = util`
			`self.constants = constants`
typeahead 2025-01-14 20:22:12 -05:00			`self.cache_utils = CacheUtils()`
add notifier to lyric_search for unknown request src, add timeout for widget/radio endpoint 2025-01-22 19:49:46 -05:00			`self.notifier = notifier.DiscordNotifier()`
test 2024-08-10 22:49:00 -04:00
cleaner 2024-08-13 10:36:53 -04:00
significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`self.endpoints: dict = {`
typeahead 2025-01-14 20:22:12 -05:00			`"typeahead/artist": self.artist_typeahead_handler,`
			`"typeahead/song": self.song_typeahead_handler,`
misc/version bump 2025-02-05 20:23:06 -05:00			`"lyric_search": self.lyric_search_handler, # Preserving old endpoint path temporarily`
			`"lyric/search": self.lyric_search_handler,`
cleaner 2024-08-13 10:36:53 -04:00			`}`

significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`self.acceptable_request_sources: list = [`
test 2024-08-10 22:49:00 -04:00			`"WEB",`
add eligible request src - WEB-RADIO 2024-08-17 06:01:18 -04:00			`"WEB-RADIO",`
test 2024-08-10 22:49:00 -04:00			`"DISC-HAVOC",`
stoof 2025-01-20 05:47:09 -05:00			`"LIMNORIA-SHARED",`
cull lyric_search acceptable request sources 2025-02-18 13:33:29 -05:00			`"IRC-SHARED",`
test 2024-08-10 22:49:00 -04:00			`]`

significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`self.lrc_regex: Pattern = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')`
stoof 2024-11-29 15:33:12 -05:00
cleaner 2024-08-13 10:36:53 -04:00			`for endpoint, handler in self.endpoints.items():`
misc/version bump 2025-02-05 20:23:06 -05:00			`_schema_include = endpoint in ["lyric/search"]`
beep boop 2025-01-29 16:03:33 -05:00			`app.add_api_route(f"/{endpoint}", handler, methods=["POST"], include_in_schema=_schema_include)`
Resolves #14 2024-08-13 19:50:02 -04:00
cleanup 2025-02-15 21:09:33 -05:00			`async def artist_typeahead_handler(self, data: ValidTypeAheadRequest) -> JSONResponse:`
			`"""`
			`Artist Type Ahead Handler`
pydantic docstrings 2025-02-16 08:17:27 -05:00			`- query: The query`
cleanup 2025-02-15 21:09:33 -05:00			`"""`
typeahead 2025-01-14 20:22:12 -05:00			`if not isinstance(data.query, str) or len(data.query) < 2:`
cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(status_code=500, content={`
typeahead 2025-01-14 20:22:12 -05:00			`'err': True,`
			`'errorText': 'Invalid request',`
cleanup 2025-02-15 21:09:33 -05:00			`})`
significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`query: str = data.query`
cleanup 2025-02-14 16:07:24 -05:00			`typeahead_result: list[dict] = await self.cache_utils.check_typeahead(query)`
cleanup/fix artist_typeahead_handler 2025-02-16 07:49:10 -05:00			`typeahead_list: list[str] = [str(r['artist']) for r in typeahead_result]`
cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(content=typeahead_list)`
typeahead 2025-01-14 20:22:12 -05:00
cleanup 2025-02-15 21:09:33 -05:00			`async def song_typeahead_handler(self, data: ValidTypeAheadRequest) -> JSONResponse:`
			`"""`
			`Song Type Ahead Handler`
pydantic docstrings 2025-02-16 08:17:27 -05:00			`- query: The query`
			`- pre_query: The pre-query (artist)`
cleanup 2025-02-15 21:09:33 -05:00			`"""`
stoof 2025-01-20 05:47:09 -05:00			`if not isinstance(data.pre_query, str)\`
cleanup 2025-02-15 21:09:33 -05:00			`or not isinstance(data.query, str):`
			`return JSONResponse(status_code=500, content={`
typeahead 2025-01-14 20:22:12 -05:00			`'err': True,`
			`'errorText': 'Invalid request',`
cleanup 2025-02-15 21:09:33 -05:00			`})`
significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`pre_query: str = data.pre_query`
			`query: str = data.query`
cleanup 2025-02-14 16:07:24 -05:00			`typeahead_result: list[dict] = await self.cache_utils.check_typeahead(query, pre_query)`
whoopsies 2025-02-15 21:18:20 -05:00			`typeahead_list: list[str] = [str(r['song']) for r in typeahead_result]`
cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(content=typeahead_list)`
typeahead 2025-01-14 20:22:12 -05:00
cleanup 2025-02-15 21:09:33 -05:00			`async def lyric_search_handler(self, data: ValidLyricRequest) -> JSONResponse:`
progress 2025-01-13 20:47:39 -05:00			`"""`
re-add support for lyric_search 't' param 2025-01-24 10:26:27 -05:00			`Search for lyrics`
progress 2025-01-13 20:47:39 -05:00			`- a: artist`
			`- s: song`
re-add support for lyric_search 't' param 2025-01-24 10:26:27 -05:00			`- t: track (artist and song combined) [used only if a & s are not used]`
beep boop 2025-01-29 16:03:33 -05:00			`- extra: include extra details in response [optional, default: false]`
add subsearch support to new lyric search endpoint 2025-01-17 05:52:16 -05:00			`- lrc: Request LRCs?`
			`- sub: text to search within lyrics, if found lyrics will begin at found verse [optional, default: none]`
			`- src: the script/utility which initiated the request`
re-add support for lyric_search 't' param 2025-01-24 10:26:27 -05:00			`- excluded_sources: sources to exclude [optional, default: none]`
progress 2025-01-13 20:47:39 -05:00			`"""`
re-add support for lyric_search 't' param 2025-01-24 10:26:27 -05:00			`if (not data.a or not data.s) and not data.t or not data.src:`
progress 2025-01-13 20:47:39 -05:00			`raise HTTPException(detail="Invalid request", status_code=500)`

add subsearch support to new lyric search endpoint 2025-01-17 05:52:16 -05:00			`if data.src.upper() not in self.acceptable_request_sources:`
add notifier to lyric_search for unknown request src, add timeout for widget/radio endpoint 2025-01-22 19:49:46 -05:00			`await self.notifier.send(f"ERROR @ {__file__.rsplit("/", maxsplit=1)[-1]}",`
			`f"Unknown request source: {data.src}")`
cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(status_code=500, content={`
stoof 2025-01-20 05:47:09 -05:00			`'err': True,`
add notifier to lyric_search for unknown request src, add timeout for widget/radio endpoint 2025-01-22 19:49:46 -05:00			`'errorText': f'Unknown request source: {data.src}',`
cleanup 2025-02-15 21:09:33 -05:00			`})`
docstring stuff 2025-01-19 07:01:07 -05:00
re-add support for lyric_search 't' param 2025-01-24 10:26:27 -05:00			`if not data.t:`
cleanup 2025-02-14 16:07:24 -05:00			`search_artist: Optional[str] = data.a`
			`search_song: Optional[str] = data.s`
re-add support for lyric_search 't' param 2025-01-24 10:26:27 -05:00			`else:`
cleanup 2025-02-15 21:09:33 -05:00			`t_split: tuple = tuple(data.t.split(" - ", maxsplit=1))`
			`(search_artist, search_song) = t_split`
docstring stuff 2025-01-19 07:01:07 -05:00
			`if search_artist and search_song:`
cleanup 2025-02-14 16:07:24 -05:00			`search_artist = str(self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_artist.strip()))`
			`search_song = str(self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_song.strip()))`
			`search_artist = urllib.parse.unquote(search_artist)`
			`search_song = urllib.parse.unquote(search_song)`
add subsearch support to new lyric search endpoint 2025-01-17 05:52:16 -05:00
cleanup 2025-02-14 16:07:24 -05:00			`if not isinstance(search_artist, str) or not isinstance(search_song, str):`
cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(status_code=500, content={`
cleanup 2025-02-14 16:07:24 -05:00			`'err': True,`
			`'errorText': 'Invalid request',`
cleanup 2025-02-15 21:09:33 -05:00			`})`
cleanup 2025-02-14 16:07:24 -05:00
			`excluded_sources: Optional[list] = data.excluded_sources`
changes 2025-01-14 18:37:49 -05:00			`aggregate_search = aggregate.Aggregate(exclude_methods=excluded_sources)`
significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`plain_lyrics: bool = not data.lrc`
cleanup 2025-02-15 21:09:33 -05:00			`result: Optional[Union[LyricsResult, dict]] = await aggregate_search.search(search_artist, search_song, plain_lyrics)`
lrc tweaks 2025-01-17 07:48:29 -05:00
progress -- change get to search for lrclib 2025-01-14 10:04:05 -05:00			`if not result:`
cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(content={`
progress -- change get to search for lrclib 2025-01-14 10:04:05 -05:00			`'err': True,`
			`'errorText': 'Sources exhausted, lyrics not located.',`
cleanup 2025-02-15 21:09:33 -05:00			`})`
lrc tweaks 2025-01-17 07:48:29 -05:00
cleanup 2025-02-14 16:07:24 -05:00			`result = vars(result)`

lrc tweaks 2025-01-17 07:48:29 -05:00			`if data.sub and not data.lrc:`
significant refactor/cleanup 2025-02-11 20:01:07 -05:00			`seeked_found_line: Optional[int] = None`
			`lyric_lines: list[str] = result['lyrics'].strip().split(" / ")`
add subsearch support to new lyric search endpoint 2025-01-17 05:52:16 -05:00			`for i, line in enumerate(lyric_lines):`
cleanup 2025-02-14 16:07:24 -05:00			`line = regex.sub(r'\u2064', '', line.strip())`
linter fixes 2025-01-17 07:54:17 -05:00			`if data.sub.strip().lower() in line.strip().lower():`
cleanup 2025-02-14 16:07:24 -05:00			`seeked_found_line = i`
linter fixes 2025-01-17 07:54:17 -05:00			`logging.debug("Found %s at %s, match for %s!",`
			`line, seeked_found_line, data.sub) # REMOVEME: DEBUG`
			`break`
change confidence to % 2025-01-17 06:41:56 -05:00
minor refactor 2025-01-17 05:53:05 -05:00			`if not seeked_found_line:`
cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(status_code=500, content={`
			`'err': True,`
			`'errorText': 'Seek (a.k.a. subsearch) failed.',`
add subsearch support to new lyric search endpoint 2025-01-17 05:52:16 -05:00			`'failed_seek': True,`
cleanup 2025-02-15 21:09:33 -05:00			`})`
add subsearch support to new lyric search endpoint 2025-01-17 05:52:16 -05:00			`result['lyrics'] = " / ".join(lyric_lines[seeked_found_line:])`

cleanup/fix artist_typeahead_handler 2025-02-16 07:49:10 -05:00			`result['confidence'] = int(result['confidence'])`
resolves #22, #29 2025-01-15 20:17:49 -05:00			`result['time'] = f'{float(result['time']):.4f}'`
lrc tweaks 2025-01-17 07:48:29 -05:00
			`if plain_lyrics:`
			`result['lyrics'] = regex.sub(r'(\s/\s\|\n)', '<br>', result['lyrics']).strip()`
			`else:`
			`# Swap lyrics key for 'lrc'`
			`result['lrc'] = result['lyrics']`
			`result.pop('lyrics')`

docstring stuff 2025-01-19 07:01:07 -05:00			`if "cache" in result['src']:`
resolves #22, #29 2025-01-15 20:17:49 -05:00			`result['from_cache'] = True`

beep boop 2025-01-29 16:03:33 -05:00			`if not data.extra:`
			`result.pop('src')`

cleanup 2025-02-15 21:09:33 -05:00			`return JSONResponse(content=result)`