cleanup

2025-02-15 21:09:33 -05:00
parent 60416c493f
commit 39d1ddaffa
22 changed files with 509 additions and 525 deletions
--- a/lyric_search/utils.py
+++ b/lyric_search/utils.py
@ -1,9 +1,10 @@
 #!/usr/bin/env python3.12

 from difflib import SequenceMatcher
-from typing import List, Optional, Tuple
+from typing import List, Optional, Union, Any
 import logging
 import regex
+from regex import Pattern

 class TrackMatcher:
    """Track Matcher"""
@ -17,7 +18,7 @@ class TrackMatcher:
        """
        self.threshold = threshold

-    def find_best_match(self, input_track: str, candidate_tracks: List[tuple[int|str, str]]) -> Optional[Tuple[str, float]]:
+    def find_best_match(self, input_track: str, candidate_tracks: List[tuple[int|str, str]]) -> Optional[tuple]:
        """
        Find the best matching track from the candidate list.
        
@ -26,7 +27,7 @@ class TrackMatcher:
            candidate_tracks (List[tuple[int|str, str]]): List of candidate tracks
            
        Returns:
-            Optional[Tuple[int, str, float]]: Tuple of (best matching track, similarity score)
+            Optional[tuple[int, str, float]]: Tuple of (best matching track, similarity score)
                                       or None if no good match found
        """

@ -38,7 +39,7 @@ class TrackMatcher:
        input_track = self._normalize_string(input_track)
        
        best_match = None
-        best_score = 0
+        best_score: float = 0.0

        for candidate in candidate_tracks:
            normalized_candidate = self._normalize_string(candidate[1])
@ -56,7 +57,10 @@ class TrackMatcher:
                best_match = candidate

        # Return the match only if it meets the threshold
-        return (best_match, round(best_score * 100)) if best_score >= self.threshold else None
+        if best_score >= self.threshold:
+            return None
+        match: tuple = (best_match, round(best_score * 100)) 
+        return match

    def _normalize_string(self, text: str) -> str:
        """
@ -98,10 +102,14 @@ class DataUtils:
    Data Utils
    """

-    def __init__(self):
+    def __init__(self) -> None:
        self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
-
-
+        self.scrub_regex_1: Pattern = regex.compile(r'(\[.*?\])(\s){0,}(\:){0,1}')
+        self.scrub_regex_2: Pattern = regex.compile(r'(\d?)(Embed\b)',
+                                                    flags=regex.IGNORECASe)
+        self.scrub_regex_3: Pattern = regex.compile(r'\n{2}')
+        self.scrub_regex_4: Pattern = regex.compile(r'[0-9]\b$')
+        
    def scrub_lyrics(self, lyrics: str) -> str:
        """
        Lyric Scrub Regex Chain
@ -110,10 +118,10 @@ class DataUtils:
        Returns:
            str: Regex scrubbed lyrics
        """
-        lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
-        lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
-        lyrics = regex.sub(r'\n{2}', '\n', lyrics)  # Gaps between verses
-        lyrics = regex.sub(r'[0-9]\b$', '', lyrics)
+        lyrics = self.scrub_regex_1.sub('', lyrics)
+        lyrics = self.scrub_regex_2.sub('', lyrics, flags=regex.IGNORECASE)
+        lyrics = self.scrub_regex_3.sub('\n', lyrics)  # Gaps between verses
+        lyrics = self.scrub_regex_3.sub('', lyrics)
        return lyrics   

    def create_lrc_object(self, lrc_str: str) -> list[dict]: