From 7899fc2d710ac0187ba323634c1fc9f698cc1b78 Mon Sep 17 00:00:00 2001
From: codey <codey@code.ee>
Date: Fri, 17 Jan 2025 07:48:29 -0500
Subject: [PATCH] lrc tweaks

---
 endpoints/lyric_search.py             | 20 ++++++++++++----
 lyric_search_new/constructors.py      |  6 ++---
 lyric_search_new/sources/aggregate.py |  2 +-
 lyric_search_new/sources/lrclib.py    | 17 ++++++++++----
 lyric_search_new/utils.py             | 34 ++++++++++++++++++++++++++-
 5 files changed, 66 insertions(+), 13 deletions(-)
diff --git a/endpoints/lyric_search.py b/endpoints/lyric_search.py
index 2a45503..7c7463f 100644
--- a/endpoints/lyric_search.py
+++ b/endpoints/lyric_search.py
@@ -191,14 +191,17 @@ class LyricSearch(FastAPI):
         aggregate_search = aggregate.Aggregate(exclude_methods=excluded_sources)
         plain_lyrics = not data.lrc
         result = await aggregate_search.search(data.a, data.s, plain_lyrics)
+        
         if not result:
             return {
                 'err': True,
                 'errorText': 'Sources exhausted, lyrics not located.',
             }
-        result = result.dict()
-        lyric_lines = result['lyrics'].strip().split(" / ")
-        if data.sub:
+        
+        result = result.todict()
+
+        if data.sub and not data.lrc:
+            lyric_lines = result['lyrics'].strip().split(" / ")        
             for i, line in enumerate(lyric_lines):
                     line = regex.sub(r'\u2064', '', line.strip())
                     if data.sub.strip().lower() in line.strip().lower():
@@ -213,9 +216,18 @@ class LyricSearch(FastAPI):
                     }
             result['lyrics'] = " / ".join(lyric_lines[seeked_found_line:])
         
-        result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
         result['confidence'] = int(result.get('confidence', 0))
         result['time'] = f'{float(result['time']):.4f}'
+        
+        if plain_lyrics:
+            result['lyrics'] = regex.sub(r'(\s/\s|\n)', '<br>', result['lyrics']).strip()
+        else:
+            # Swap lyrics key for 'lrc'
+            logging.info("lyrics: %s, type: %s",
+                         result['lyrics'], type(result['lyrics']))
+            result['lrc'] = result['lyrics']
+            result.pop('lyrics')
+
         if "cached" in result['src']:
             result['from_cache'] = True
         
diff --git a/lyric_search_new/constructors.py b/lyric_search_new/constructors.py
index a95c5cc..fdc82d3 100644
--- a/lyric_search_new/constructors.py
+++ b/lyric_search_new/constructors.py
@@ -8,10 +8,10 @@ class LyricsResult:
     artist: str
     song: str
     src: str
-    lyrics: str
+    lyrics: str|dict
     confidence: int
     time: float = 0.00
 
-    def dict(self):
+    def todict(self):
         """Return as dict"""
-        return {k: str(v) for k, v in asdict(self).items()}    
\ No newline at end of file
+        return {k: type(v)(v) for k, v in asdict(self).items()}    
\ No newline at end of file
diff --git a/lyric_search_new/sources/aggregate.py b/lyric_search_new/sources/aggregate.py
index b20b338..2f2ec00 100644
--- a/lyric_search_new/sources/aggregate.py
+++ b/lyric_search_new/sources/aggregate.py
@@ -36,7 +36,7 @@ class Aggregate:
         search_result: Optional[LyricsResult] = None
         for source in sources:
             if source.label.lower() in self.exclude_methods:
-                if source.label.lower() != "cache":
+                if source.label.lower() != "cache" or not plain:
                     logging.info("Skipping source: %s, excluded.", source.label)
                     continue
                 logging.info("Cache exclude requested, ignoring")
diff --git a/lyric_search_new/sources/lrclib.py b/lyric_search_new/sources/lrclib.py
index 7e0f99d..0d57848 100644
--- a/lyric_search_new/sources/lrclib.py
+++ b/lyric_search_new/sources/lrclib.py
@@ -41,6 +41,8 @@ class LRCLib:
             artist: str = artist.strip().lower()
             song: str = song.strip().lower()
             time_start: float = time.time()
+            lrc_obj: Optional[list[dict]] = None
+
             logging.info("Searching %s - %s on %s",
                          artist, song, self.label) 
                 
@@ -67,9 +69,16 @@ class LRCLib:
                     if not isinstance(search_data, list):
                         raise InvalidResponseException("Invalid JSON.")
                     
-                    possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
+                    if plain:
+                        possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
                                          for x, result in enumerate(search_data)]
+                    else:
+                      logging.info("Limiting possible matches to only those with non-null syncedLyrics")
+                      possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
+                                         for x, result in enumerate(search_data) if isinstance(result['syncedLyrics'], str)]                        
+
                     
+
                     best_match = self.matcher.find_best_match(input_track,
                                                                  possible_matches)[0]
                     if not best_match:
@@ -93,6 +102,7 @@ class LRCLib:
                         if not isinstance(search_data[best_match_id]['syncedLyrics'], str):
                             raise InvalidResponseException(f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}")
                         returned_lyrics: str = search_data[best_match_id]['syncedLyrics']
+                        lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
                     returned_track: str = f"{returned_artist} - {returned_song}"
                     (_matched, confidence) = self.matcher.find_best_match(input_track=input_track,
                                                                         candidate_tracks=[(0, returned_track)])
@@ -104,12 +114,11 @@ class LRCLib:
                     matched = LyricsResult(artist=returned_artist,
                                         song=returned_song,
                                         src=self.label,
-                                        lyrics=returned_lyrics,
+                                        lyrics=returned_lyrics if plain else lrc_obj,
                                         confidence=confidence,
                                         time=time_diff)
                     await self.cache.store(matched)
                     return matched
         except:
-            if log_level == "DEBUG":
-                traceback.print_exc()
+            traceback.print_exc()
             return
\ No newline at end of file
diff --git a/lyric_search_new/utils.py b/lyric_search_new/utils.py
index 4b6d4a4..19518f5 100644
--- a/lyric_search_new/utils.py
+++ b/lyric_search_new/utils.py
@@ -88,10 +88,42 @@ class DataUtils:
     """
     Data Utils
     """
+
+    def __init__(self):
+        self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
+
+
     def scrub_lyrics(self, lyrics: str) -> str:
         """Regex Chain"""
         lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
         lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
         lyrics = regex.sub(r'\n{2}', '\n', lyrics)  # Gaps between verses
         lyrics = regex.sub(r'[0-9]\b$', '', lyrics)
-        return lyrics        
+        return lyrics   
+
+    def create_lrc_object(self, lrc_str: str) -> list[dict]:
+        lrc_out: list = []
+        for line in lrc_str.split("\n"):
+            _timetag = None
+            _words = None
+            if not line.strip():
+                continue
+            reg_helper = regex.findall(self.lrc_regex, line.strip())
+            if not reg_helper:
+                continue
+            reg_helper = reg_helper[0]
+            logging.debug("Reg helper: %s for line: %s; len: %s",
+                            reg_helper, line, len(reg_helper))
+            _timetag = reg_helper[0]
+            if not reg_helper[1].strip():
+                _words = "♪"
+            else:
+                _words = reg_helper[1]
+            lrc_out.append({
+                "timeTag": _timetag,
+                "words": _words,
+            })
+            logging.info("util: returning %s, type: %s",
+                         lrc_out, type(lrc_out))
+        return lrc_out        
+