This commit is contained in:
2025-02-15 21:09:33 -05:00
parent 60416c493f
commit 39d1ddaffa
22 changed files with 509 additions and 525 deletions

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3.12
from difflib import SequenceMatcher
from typing import List, Optional, Tuple
from typing import List, Optional, Union, Any
import logging
import regex
from regex import Pattern
class TrackMatcher:
"""Track Matcher"""
@ -17,7 +18,7 @@ class TrackMatcher:
"""
self.threshold = threshold
def find_best_match(self, input_track: str, candidate_tracks: List[tuple[int|str, str]]) -> Optional[Tuple[str, float]]:
def find_best_match(self, input_track: str, candidate_tracks: List[tuple[int|str, str]]) -> Optional[tuple]:
"""
Find the best matching track from the candidate list.
@ -26,7 +27,7 @@ class TrackMatcher:
candidate_tracks (List[tuple[int|str, str]]): List of candidate tracks
Returns:
Optional[Tuple[int, str, float]]: Tuple of (best matching track, similarity score)
Optional[tuple[int, str, float]]: Tuple of (best matching track, similarity score)
or None if no good match found
"""
@ -38,7 +39,7 @@ class TrackMatcher:
input_track = self._normalize_string(input_track)
best_match = None
best_score = 0
best_score: float = 0.0
for candidate in candidate_tracks:
normalized_candidate = self._normalize_string(candidate[1])
@ -56,7 +57,10 @@ class TrackMatcher:
best_match = candidate
# Return the match only if it meets the threshold
return (best_match, round(best_score * 100)) if best_score >= self.threshold else None
if best_score >= self.threshold:
return None
match: tuple = (best_match, round(best_score * 100))
return match
def _normalize_string(self, text: str) -> str:
"""
@ -98,10 +102,14 @@ class DataUtils:
Data Utils
"""
def __init__(self):
def __init__(self) -> None:
self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
self.scrub_regex_1: Pattern = regex.compile(r'(\[.*?\])(\s){0,}(\:){0,1}')
self.scrub_regex_2: Pattern = regex.compile(r'(\d?)(Embed\b)',
flags=regex.IGNORECASe)
self.scrub_regex_3: Pattern = regex.compile(r'\n{2}')
self.scrub_regex_4: Pattern = regex.compile(r'[0-9]\b$')
def scrub_lyrics(self, lyrics: str) -> str:
"""
Lyric Scrub Regex Chain
@ -110,10 +118,10 @@ class DataUtils:
Returns:
str: Regex scrubbed lyrics
"""
lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
lyrics = regex.sub(r'\n{2}', '\n', lyrics) # Gaps between verses
lyrics = regex.sub(r'[0-9]\b$', '', lyrics)
lyrics = self.scrub_regex_1.sub('', lyrics)
lyrics = self.scrub_regex_2.sub('', lyrics, flags=regex.IGNORECASE)
lyrics = self.scrub_regex_3.sub('\n', lyrics) # Gaps between verses
lyrics = self.scrub_regex_3.sub('', lyrics)
return lyrics
def create_lrc_object(self, lrc_str: str) -> list[dict]: