From 7899fc2d710ac0187ba323634c1fc9f698cc1b78 Mon Sep 17 00:00:00 2001 From: codey Date: Fri, 17 Jan 2025 07:48:29 -0500 Subject: [PATCH] lrc tweaks --- endpoints/lyric_search.py | 20 ++++++++++++---- lyric_search_new/constructors.py | 6 ++--- lyric_search_new/sources/aggregate.py | 2 +- lyric_search_new/sources/lrclib.py | 17 ++++++++++---- lyric_search_new/utils.py | 34 ++++++++++++++++++++++++++- 5 files changed, 66 insertions(+), 13 deletions(-) diff --git a/endpoints/lyric_search.py b/endpoints/lyric_search.py index 2a45503..7c7463f 100644 --- a/endpoints/lyric_search.py +++ b/endpoints/lyric_search.py @@ -191,14 +191,17 @@ class LyricSearch(FastAPI): aggregate_search = aggregate.Aggregate(exclude_methods=excluded_sources) plain_lyrics = not data.lrc result = await aggregate_search.search(data.a, data.s, plain_lyrics) + if not result: return { 'err': True, 'errorText': 'Sources exhausted, lyrics not located.', } - result = result.dict() - lyric_lines = result['lyrics'].strip().split(" / ") - if data.sub: + + result = result.todict() + + if data.sub and not data.lrc: + lyric_lines = result['lyrics'].strip().split(" / ") for i, line in enumerate(lyric_lines): line = regex.sub(r'\u2064', '', line.strip()) if data.sub.strip().lower() in line.strip().lower(): @@ -213,9 +216,18 @@ class LyricSearch(FastAPI): } result['lyrics'] = " / ".join(lyric_lines[seeked_found_line:]) - result['lyrics'] = regex.sub(r'(\s/\s|\n)', '
', result['lyrics']).strip() result['confidence'] = int(result.get('confidence', 0)) result['time'] = f'{float(result['time']):.4f}' + + if plain_lyrics: + result['lyrics'] = regex.sub(r'(\s/\s|\n)', '
', result['lyrics']).strip() + else: + # Swap lyrics key for 'lrc' + logging.info("lyrics: %s, type: %s", + result['lyrics'], type(result['lyrics'])) + result['lrc'] = result['lyrics'] + result.pop('lyrics') + if "cached" in result['src']: result['from_cache'] = True diff --git a/lyric_search_new/constructors.py b/lyric_search_new/constructors.py index a95c5cc..fdc82d3 100644 --- a/lyric_search_new/constructors.py +++ b/lyric_search_new/constructors.py @@ -8,10 +8,10 @@ class LyricsResult: artist: str song: str src: str - lyrics: str + lyrics: str|dict confidence: int time: float = 0.00 - def dict(self): + def todict(self): """Return as dict""" - return {k: str(v) for k, v in asdict(self).items()} \ No newline at end of file + return {k: type(v)(v) for k, v in asdict(self).items()} \ No newline at end of file diff --git a/lyric_search_new/sources/aggregate.py b/lyric_search_new/sources/aggregate.py index b20b338..2f2ec00 100644 --- a/lyric_search_new/sources/aggregate.py +++ b/lyric_search_new/sources/aggregate.py @@ -36,7 +36,7 @@ class Aggregate: search_result: Optional[LyricsResult] = None for source in sources: if source.label.lower() in self.exclude_methods: - if source.label.lower() != "cache": + if source.label.lower() != "cache" or not plain: logging.info("Skipping source: %s, excluded.", source.label) continue logging.info("Cache exclude requested, ignoring") diff --git a/lyric_search_new/sources/lrclib.py b/lyric_search_new/sources/lrclib.py index 7e0f99d..0d57848 100644 --- a/lyric_search_new/sources/lrclib.py +++ b/lyric_search_new/sources/lrclib.py @@ -41,6 +41,8 @@ class LRCLib: artist: str = artist.strip().lower() song: str = song.strip().lower() time_start: float = time.time() + lrc_obj: Optional[list[dict]] = None + logging.info("Searching %s - %s on %s", artist, song, self.label) @@ -67,9 +69,16 @@ class LRCLib: if not isinstance(search_data, list): raise InvalidResponseException("Invalid JSON.") - possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}") + if plain: + possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}") for x, result in enumerate(search_data)] + else: + logging.info("Limiting possible matches to only those with non-null syncedLyrics") + possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}") + for x, result in enumerate(search_data) if isinstance(result['syncedLyrics'], str)] + + best_match = self.matcher.find_best_match(input_track, possible_matches)[0] if not best_match: @@ -93,6 +102,7 @@ class LRCLib: if not isinstance(search_data[best_match_id]['syncedLyrics'], str): raise InvalidResponseException(f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}") returned_lyrics: str = search_data[best_match_id]['syncedLyrics'] + lrc_obj = self.datautils.create_lrc_object(returned_lyrics) returned_track: str = f"{returned_artist} - {returned_song}" (_matched, confidence) = self.matcher.find_best_match(input_track=input_track, candidate_tracks=[(0, returned_track)]) @@ -104,12 +114,11 @@ class LRCLib: matched = LyricsResult(artist=returned_artist, song=returned_song, src=self.label, - lyrics=returned_lyrics, + lyrics=returned_lyrics if plain else lrc_obj, confidence=confidence, time=time_diff) await self.cache.store(matched) return matched except: - if log_level == "DEBUG": - traceback.print_exc() + traceback.print_exc() return \ No newline at end of file diff --git a/lyric_search_new/utils.py b/lyric_search_new/utils.py index 4b6d4a4..19518f5 100644 --- a/lyric_search_new/utils.py +++ b/lyric_search_new/utils.py @@ -88,10 +88,42 @@ class DataUtils: """ Data Utils """ + + def __init__(self): + self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}') + + def scrub_lyrics(self, lyrics: str) -> str: """Regex Chain""" lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics) lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE) lyrics = regex.sub(r'\n{2}', '\n', lyrics) # Gaps between verses lyrics = regex.sub(r'[0-9]\b$', '', lyrics) - return lyrics + return lyrics + + def create_lrc_object(self, lrc_str: str) -> list[dict]: + lrc_out: list = [] + for line in lrc_str.split("\n"): + _timetag = None + _words = None + if not line.strip(): + continue + reg_helper = regex.findall(self.lrc_regex, line.strip()) + if not reg_helper: + continue + reg_helper = reg_helper[0] + logging.debug("Reg helper: %s for line: %s; len: %s", + reg_helper, line, len(reg_helper)) + _timetag = reg_helper[0] + if not reg_helper[1].strip(): + _words = "♪" + else: + _words = reg_helper[1] + lrc_out.append({ + "timeTag": _timetag, + "words": _words, + }) + logging.info("util: returning %s, type: %s", + lrc_out, type(lrc_out)) + return lrc_out +