lrc tweaks

This commit is contained in:
2025-01-17 07:48:29 -05:00
parent 76182e3df1
commit 7899fc2d71
5 changed files with 66 additions and 13 deletions

View File

@@ -8,10 +8,10 @@ class LyricsResult:
artist: str
song: str
src: str
lyrics: str
lyrics: str|dict
confidence: int
time: float = 0.00
def dict(self):
def todict(self):
"""Return as dict"""
return {k: str(v) for k, v in asdict(self).items()}
return {k: type(v)(v) for k, v in asdict(self).items()}

View File

@@ -36,7 +36,7 @@ class Aggregate:
search_result: Optional[LyricsResult] = None
for source in sources:
if source.label.lower() in self.exclude_methods:
if source.label.lower() != "cache":
if source.label.lower() != "cache" or not plain:
logging.info("Skipping source: %s, excluded.", source.label)
continue
logging.info("Cache exclude requested, ignoring")

View File

@@ -41,6 +41,8 @@ class LRCLib:
artist: str = artist.strip().lower()
song: str = song.strip().lower()
time_start: float = time.time()
lrc_obj: Optional[list[dict]] = None
logging.info("Searching %s - %s on %s",
artist, song, self.label)
@@ -67,9 +69,16 @@ class LRCLib:
if not isinstance(search_data, list):
raise InvalidResponseException("Invalid JSON.")
possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
if plain:
possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
for x, result in enumerate(search_data)]
else:
logging.info("Limiting possible matches to only those with non-null syncedLyrics")
possible_matches = [(x, f"{result.get('artistName')} - {result.get('trackName')}")
for x, result in enumerate(search_data) if isinstance(result['syncedLyrics'], str)]
best_match = self.matcher.find_best_match(input_track,
possible_matches)[0]
if not best_match:
@@ -93,6 +102,7 @@ class LRCLib:
if not isinstance(search_data[best_match_id]['syncedLyrics'], str):
raise InvalidResponseException(f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}")
returned_lyrics: str = search_data[best_match_id]['syncedLyrics']
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
returned_track: str = f"{returned_artist} - {returned_song}"
(_matched, confidence) = self.matcher.find_best_match(input_track=input_track,
candidate_tracks=[(0, returned_track)])
@@ -104,12 +114,11 @@ class LRCLib:
matched = LyricsResult(artist=returned_artist,
song=returned_song,
src=self.label,
lyrics=returned_lyrics,
lyrics=returned_lyrics if plain else lrc_obj,
confidence=confidence,
time=time_diff)
await self.cache.store(matched)
return matched
except:
if log_level == "DEBUG":
traceback.print_exc()
traceback.print_exc()
return

View File

@@ -88,10 +88,42 @@ class DataUtils:
"""
Data Utils
"""
def __init__(self):
self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
def scrub_lyrics(self, lyrics: str) -> str:
"""Regex Chain"""
lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
lyrics = regex.sub(r'\n{2}', '\n', lyrics) # Gaps between verses
lyrics = regex.sub(r'[0-9]\b$', '', lyrics)
return lyrics
return lyrics
def create_lrc_object(self, lrc_str: str) -> list[dict]:
lrc_out: list = []
for line in lrc_str.split("\n"):
_timetag = None
_words = None
if not line.strip():
continue
reg_helper = regex.findall(self.lrc_regex, line.strip())
if not reg_helper:
continue
reg_helper = reg_helper[0]
logging.debug("Reg helper: %s for line: %s; len: %s",
reg_helper, line, len(reg_helper))
_timetag = reg_helper[0]
if not reg_helper[1].strip():
_words = ""
else:
_words = reg_helper[1]
lrc_out.append({
"timeTag": _timetag,
"words": _words,
})
logging.info("util: returning %s, type: %s",
lrc_out, type(lrc_out))
return lrc_out