docstring stuff

This commit is contained in:
2025-01-19 07:01:07 -05:00
parent 151643c5dc
commit be0ef08f3d
9 changed files with 118 additions and 52 deletions

View File

@@ -62,6 +62,10 @@ class TrackMatcher:
"""
Normalize string for comparison by removing special characters,
extra spaces, and converting to lowercase.
Args:
text (str): The text to normalize
Returns:
str: Normalized text
"""
# Remove special characters and convert to lowercase
text = regex.sub(r'[^\w\s-]', '', text).lower()
@@ -72,6 +76,11 @@ class TrackMatcher:
def _calculate_token_similarity(self, str1: str, str2: str) -> float:
"""
Calculate similarity based on matching tokens (words).
Args:
str1 (str): string 1 to compare
str2 (str): string 2 to compare
Returns:
float: The token similarity score
"""
tokens1 = set(str1.split())
tokens2 = set(str2.split())
@@ -94,8 +103,12 @@ class DataUtils:
def scrub_lyrics(self, lyrics: str) -> str:
"""Regex Chain
@lyrics: The lyrics (str) to scrub
"""
Lyric Scrub Regex Chain
Args:
lyrics (str): The lyrics to scrub
Returns:
str: Regex scrubbed lyrics
"""
lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
@@ -104,8 +117,12 @@ class DataUtils:
return lyrics
def create_lrc_object(self, lrc_str: str) -> list[dict]:
"""Create LRC Object
@lrc_str: The raw LRCLib syncedLyrics (str)
"""
Create LRC Object
Args:
lrc_str (str): The raw LRCLib syncedLyrics
Returns:
list[dict]: LRC Object comprised of timestamps/lyrics
"""
lrc_out: list = []
for line in lrc_str.split("\n"):
@@ -128,6 +145,4 @@ class DataUtils:
"timeTag": _timetag,
"words": _words,
})
logging.info("util: returning %s, type: %s",
lrc_out, type(lrc_out))
return lrc_out