lrc tweaks
This commit is contained in:
@@ -88,10 +88,42 @@ class DataUtils:
|
||||
"""
|
||||
Data Utils
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
|
||||
|
||||
|
||||
def scrub_lyrics(self, lyrics: str) -> str:
|
||||
"""Regex Chain"""
|
||||
lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
|
||||
lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
|
||||
lyrics = regex.sub(r'\n{2}', '\n', lyrics) # Gaps between verses
|
||||
lyrics = regex.sub(r'[0-9]\b$', '', lyrics)
|
||||
return lyrics
|
||||
return lyrics
|
||||
|
||||
def create_lrc_object(self, lrc_str: str) -> list[dict]:
|
||||
lrc_out: list = []
|
||||
for line in lrc_str.split("\n"):
|
||||
_timetag = None
|
||||
_words = None
|
||||
if not line.strip():
|
||||
continue
|
||||
reg_helper = regex.findall(self.lrc_regex, line.strip())
|
||||
if not reg_helper:
|
||||
continue
|
||||
reg_helper = reg_helper[0]
|
||||
logging.debug("Reg helper: %s for line: %s; len: %s",
|
||||
reg_helper, line, len(reg_helper))
|
||||
_timetag = reg_helper[0]
|
||||
if not reg_helper[1].strip():
|
||||
_words = "♪"
|
||||
else:
|
||||
_words = reg_helper[1]
|
||||
lrc_out.append({
|
||||
"timeTag": _timetag,
|
||||
"words": _words,
|
||||
})
|
||||
logging.info("util: returning %s, type: %s",
|
||||
lrc_out, type(lrc_out))
|
||||
return lrc_out
|
||||
|
||||
|
||||
Reference in New Issue
Block a user