lrc tweaks

This commit is contained in:
2025-01-17 07:48:29 -05:00
parent 76182e3df1
commit 7899fc2d71
5 changed files with 66 additions and 13 deletions

View File

@@ -88,10 +88,42 @@ class DataUtils:
"""
Data Utils
"""
def __init__(self):
self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
def scrub_lyrics(self, lyrics: str) -> str:
"""Regex Chain"""
lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
lyrics = regex.sub(r'\n{2}', '\n', lyrics) # Gaps between verses
lyrics = regex.sub(r'[0-9]\b$', '', lyrics)
return lyrics
return lyrics
def create_lrc_object(self, lrc_str: str) -> list[dict]:
lrc_out: list = []
for line in lrc_str.split("\n"):
_timetag = None
_words = None
if not line.strip():
continue
reg_helper = regex.findall(self.lrc_regex, line.strip())
if not reg_helper:
continue
reg_helper = reg_helper[0]
logging.debug("Reg helper: %s for line: %s; len: %s",
reg_helper, line, len(reg_helper))
_timetag = reg_helper[0]
if not reg_helper[1].strip():
_words = ""
else:
_words = reg_helper[1]
lrc_out.append({
"timeTag": _timetag,
"words": _words,
})
logging.info("util: returning %s, type: %s",
lrc_out, type(lrc_out))
return lrc_out