api/endpoints/lyric_search.py

#!/usr/bin/env python3.12
# pylint: disable=bare-except, broad-exception-raised, broad-exception-caught

import importlib
import traceback
import logging
import os
import urllib.parse
from typing import Optional
import regex
import aiohttp
import aiosqlite as sqlite3
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from lyric_search_new.sources import aggregate


class ValidLyricRequest(BaseModel):
    """
    - **a**: artist
    - **s**: song
    - **t**: track (artist and song combined) [used only if a & s are not used]
    - **extra**: include extra details in response [optional, default: false]
    - **lrc**: Request LRCs?
    - **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional]
    - **src**: the script/utility which initiated the request
    - **excluded_sources**: sources to exclude (new only)
    """

    a: str | None = None
    s: str | None = None
    t: str | None = None
    sub: str | None = None
    extra: bool | None = False
    lrc: bool | None = False
    src: str
    excluded_sources: list | None = None

    class Config: # pylint: disable=missing-class-docstring too-few-public-methods
        schema_extra = {
                "example": {
                        "a": "eminem",
                        "s": "rap god",
                        "src": "WEB",
                        "extra": True,
                        "lrc": False,
                    }
            }

class ValidTypeAheadRequest(BaseModel):
    """
    - **query**: query string
    """
    pre_query: str|None = None
    query: str


class ValidLyricSearchLogRequest(BaseModel):
    """
    - **webradio**: whether or not to include requests generated automatically by the radio page on codey.lol, defaults to False
    """

    webradio: bool = False


class CacheUtils:
    """Lyrics Cache DB Utils"""
    def __init__(self):
        self.lyrics_db_path = os.path.join("/", "var", "lib",
                                        "singerdbs", "cached_lyrics.db")

    async def check_typeahead(self, s: str, pre_query: str | None = None):
        """Check s against artists stored - for typeahead"""
        async with sqlite3.connect(self.lyrics_db_path,
                                   timeout=2) as db_conn:
            db_conn.row_factory = lambda c, r: dict([(col[0], r[idx]) for idx, col in enumerate(c.description)])
            if not pre_query:
                query = "SELECT distinct(artist) FROM lyrics WHERE artist LIKE ? LIMIT 15"
                query_params = (f"%{s}%",)
            else:
                query = "SELECT distinct(song) FROM lyrics WHERE artist LIKE ? AND song LIKE ? LIMIT 15"
                query_params = (f"%{pre_query}%", f"%{s}%",)
            async with db_conn.execute(query, query_params) as db_cursor:
                return await db_cursor.fetchall()


class LyricSearch(FastAPI):
    """Lyric Search Endpoint"""
    def __init__(self, app: FastAPI, util, constants, glob_state): # pylint: disable=super-init-not-called
        self.app = app
        self.util = util
        self.constants = constants
        self.glob_state = glob_state
        self.cache_utils = CacheUtils()
        self.lyrics_engine = importlib.import_module("lyrics_engine").LyricsEngine()

        self.endpoint_name = "lyric_search"
        self.endpoint2_name = "lyric_cache_list"

        self.endpoints = {
            "typeahead/artist": self.artist_typeahead_handler,
            "typeahead/song": self.song_typeahead_handler,
            # "lyric_search": self.lyric_search_handler,
            "lyric_search": self.new_test, #test
            "lyric_cache_list": self.lyric_cache_list_handler,
            "lyric_search_history": self.lyric_search_log_handler,
            "lyric_search_test": self.new_test,
        }

        self.acceptable_request_sources = [
            "WEB",
            "WEB-RADIO",
            "IRC-MS",
            "IRC-FS",
            "IRC-KALI",
            "DISC-ACES",
            "DISC-HAVOC",
            "IRC-SHARED"
        ]

        self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')

        for endpoint, handler in self.endpoints.items():
            app.add_api_route(f"/{endpoint}/", handler, methods=["POST"])

    async def lyric_cache_list_handler(self):
        """
        Get currently cached lyrics entries
        """
        return {
            'err': False,
            'data': await self.lyrics_engine.listCacheEntries()
        }

    async def artist_typeahead_handler(self, data: ValidTypeAheadRequest):
        """Artist Type  Ahead Handler"""
        if not isinstance(data.query, str) or len(data.query) < 2:
            return {
                'err': True,
                'errorText': 'Invalid request',
            }
        query = data.query
        typeahead_result = await self.cache_utils.check_typeahead(query)
        typeahead_list = [str(r.get('artist')) for r in typeahead_result]
        return typeahead_list

    async def song_typeahead_handler(self, data: ValidTypeAheadRequest):
        """Song Type Ahead Handler"""
        if not isinstance(data.pre_query, str) or len(data.pre_query) < 2\
            or not isinstance(data.query, str) or len(data.query) < 2:
            return {
                'err': True,
                'errorText': 'Invalid request',
            }
        pre_query = data.pre_query
        query = data.query
        typeahead_result = await self.cache_utils.check_typeahead(query, pre_query)
        typeahead_list = [str(r.get('song')) for r in typeahead_result]
        return typeahead_list

    async def lyric_search_log_handler(self, data: ValidLyricSearchLogRequest):
        """Lyric Search Log Handler"""
        include_radio = data.webradio
        await self.glob_state.increment_counter('lyrichistory_requests')
        last_10k_sings = await self.lyrics_engine.getHistory(limit=10000, webradio=include_radio)
        return {
            'err': False,
            'history': last_10k_sings
        }

    async def new_test(self, data: ValidLyricRequest):
        """
        Search for lyrics (testing)

        - **a**: artist
        - **s**: song
        - **t**: track (artist and song combined) [used only if a & s are not used] [unused]
        - **extra**: include extra details in response [optional, default: false] [unused]
        - **lrc**: Request LRCs?
        - **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional, default: none]
        - **src**: the script/utility which initiated the request
        - **excluded_sources**: sources to exclude
        """

        if not data.a or not data.s or not data.src:
            raise HTTPException(detail="Invalid request", status_code=500)

        if data.src.upper() not in self.acceptable_request_sources:
            raise HTTPException(detail="Invalid request", status_code=500)

        search_artist: Optional[str] = data.a
        search_song: Optional[str] = data.s


        if search_artist and search_song:
            search_artist = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_artist.strip())
            search_song = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_song.strip())
            search_artist = urllib.parse.unquote(search_artist)
            search_song = urllib.parse.unquote(search_song)

        excluded_sources = data.excluded_sources
        aggregate_search = aggregate.Aggregate(exclude_methods=excluded_sources)
        plain_lyrics = not data.lrc
        result = await aggregate_search.search(search_artist, search_song, plain_lyrics)

        if not result:
            return {
                'err': True,
                'errorText': 'Sources exhausted, lyrics not located.',
            }

        result = result.todict()

        if data.sub and not data.lrc:
            seeked_found_line = None
            lyric_lines = result['lyrics'].strip().split(" / ")
            for i, line in enumerate(lyric_lines):
                line = regex.sub(r'\u2064', '', line.strip())
                if data.sub.strip().lower() in line.strip().lower():
                    seeked_found_line = i
                    logging.debug("Found %s at %s, match for %s!",
                                  line, seeked_found_line, data.sub) # REMOVEME: DEBUG
                    break

            if not seeked_found_line:
                return {
                            'failed_seek': True,
                    }
            result['lyrics'] = " / ".join(lyric_lines[seeked_found_line:])

        result['confidence'] = int(result.get('confidence', 0))
        result['time'] = f'{float(result['time']):.4f}'

        if plain_lyrics:
            result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
        else:
            # Swap lyrics key for 'lrc'
            result['lrc'] = result['lyrics']
            result.pop('lyrics')

        if "cache" in result['src']:
            result['from_cache'] = True

        """
        REMOVE BELOW AFTER TESTING IS DONE
        """

        # if not data.extra:
        #     result.pop('src')
        return result


    async def lyric_search_handler(self, data: ValidLyricRequest):
        """
        Search for lyrics

        - **a**: artist
        - **s**: song
        - **t**: track (artist and song combined) [used only if a & s are not used]
        - **extra**: include extra details in response [optional, default: false]
        - **lrc**: Request LRCs?
        - **sub**: text to search within lyrics, if found lyrics will begin at found verse [optional, default: none]
        - **src**: the script/utility which initiated the request
        """

        lrc = data.lrc
        src = data.src.upper()
        if not src in self.acceptable_request_sources:
            raise HTTPException(detail="Invalid request source", status_code=403)

        await self.glob_state.increment_counter('lyric_requests')

        search_artist = data.a
        search_song = data.s
        search_text = data.t
        add_extras = data.extra
        sub_search = data.sub
        search_object = None

        random_song_requested = (search_artist == "!" and search_song == "!")
        query_valid = (
            not(search_artist is None) and
        not(search_song is None) and
        len(search_artist) >= 1 and
        len(search_song) >= 1 and
        len(search_artist) + len(search_song) >= 3
        )

        if not random_song_requested and (not search_text and not query_valid):
            return {
                "err": True,
                "errorText": "Invalid parameters"
                }
        if search_artist and search_song:
            search_artist = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_artist.strip())
            search_song = self.constants.DOUBLE_SPACE_REGEX.sub(" ", search_song.strip())
            search_artist = urllib.parse.unquote(search_artist)
            search_song = urllib.parse.unquote(search_song)

        if search_text is None:
            # pylint: disable=consider-using-f-string
            search_object = self.lyrics_engine.create_query_object("%s : %s" % (search_artist, search_song))
            if sub_search:
                sub_search = regex.sub(r'\s{2,}', ' ', sub_search.strip())
                search_object = self.lyrics_engine.create_query_object("%s : %s : %s" % (search_artist, search_song, sub_search))
        else:
            search_object = self.lyrics_engine.create_query_object(str(search_text))

        if lrc:
            search_worker = await self.lyrics_engine.grabFromSpotify(searching=search_object,
                                                                     lrc=True)

            spotify_lyrics_unsynced = True
            if search_worker and search_worker.get('l'):
                for line in search_worker.get('l'):
                    if line.get('timeTag') and line.get('timeTag') != "00:00.00":
                        spotify_lyrics_unsynced = False
            if not search_worker or spotify_lyrics_unsynced:
                # Try LRCLib before failing out
                try:
                    lrclib_api_url = "https://lrclib.net/api/get"
                    sane_artist = urllib.parse.quote_plus(search_artist)
                    sane_track = urllib.parse.quote_plus(search_song)
                    async with aiohttp.ClientSession() as session:
                        async with session.get(f"{lrclib_api_url}?artist_name={sane_artist}&track_name={sane_track}") as request:
                            request.raise_for_status()
                            response_json = await request.json()
                            if not "syncedLyrics" in response_json:
                                raise BaseException("LRCLib Fallback Failed")
                            lrc_content = response_json.get('syncedLyrics')
                            returned_artist = response_json.get('artistName')
                            returned_song = response_json.get('trackName')
                            logging.debug("Synced Lyrics [LRCLib]: %s",
                                          lrc_content)
                            lrc_content_out = []
                            for line in lrc_content.split("\n"):
                                _timetag = None
                                _words = None
                                if not line.strip():
                                    continue
                                reg_helper = regex.findall(self.lrc_regex, line.strip())
                                if not reg_helper:
                                    continue
                                reg_helper = reg_helper[0]
                                _timetag = reg_helper[0]
                                if not reg_helper[1].strip():
                                    _words = "♪"
                                else:
                                    _words = reg_helper[1]
                                lrc_content_out.append({
                                    "timeTag": _timetag,
                                    "words": _words,
                                })

                            return {
                                'err': False,
                                'artist': returned_artist,
                                'song': returned_song,
                                'combo_lev': "N/A",
                                'lrc': lrc_content_out,
                                'from_cache': False,
                                'src': 'Alt LRC SRC',
                                'reqn': await self.glob_state.get_counter('lyric_requests'),
                            }
                except:
                    traceback.print_exc()
                    return {
                        'err': True,
                        'errorText': 'Search failed!',
                    }


                return {
                    'err': True,
                    'errorText': 'Search failed!',
                }
        if lrc:
            return {
                'err': False,
                'artist': search_worker['artist'],
                'song': search_worker['song'],
                'combo_lev': search_worker['combo_lev'],
                'lrc': search_worker['l'],
                'from_cache': False,
                'src': search_worker['method'],
                'reqn': await self.glob_state.get_counter('lyric_requests'),
            }

        search_worker = await self.lyrics_engine.lyrics_worker(searching=search_object)


        if not search_worker or not 'l' in search_worker.keys():
            await self.glob_state.increment_counter('failedlyric_requests')
            return {
                'err': True,
                'errorText': 'Sources exhausted, lyrics not located.'
            }


        await self.lyrics_engine.storeHistEntry(artist=search_worker.get('artist'),
                                                song=search_worker.get('song'),
                                                retr_method=search_worker.get('method'),
                                                request_src=src.strip())

        return {
            'err': False,
            'artist': search_worker['artist'],
            'song': search_worker['song'],
            'combo_lev': f'{search_worker['combo_lev']:.2f}',
            'lyrics': regex.sub(r"\s/\s", "<br>", " ".join(search_worker['l'])),
            'from_cache': search_worker['method'].strip().lower().startswith("local cache"),
            'src': search_worker['method'] if add_extras else None,
            'reqn': await self.glob_state.get_counter('lyric_requests')
        }