radio_util: open tracks SQLite DB in readonly mode; black: reformat files

2025-04-17 07:28:05 -04:00
parent 96add377df
commit 6c88c23a4d
25 changed files with 1913 additions and 1340 deletions
--- a/lyric_search/utils.py
+++ b/lyric_search/utils.py
@ -4,38 +4,41 @@ import logging
 import regex
 from regex import Pattern

+
 class TrackMatcher:
    """Track Matcher"""
+
    def __init__(self, threshold: float = 0.85):
        """
        Initialize the TrackMatcher with a similarity threshold.
-        
+
        Args:
            threshold (float): Minimum similarity score to consider a match valid
                             (between 0 and 1, default 0.85)
        """
        self.threshold = threshold

-    def find_best_match(self, input_track: str, candidate_tracks: List[tuple[int|str, str]]) -> Optional[tuple]:
+    def find_best_match(
+        self, input_track: str, candidate_tracks: List[tuple[int | str, str]]
+    ) -> Optional[tuple]:
        """
        Find the best matching track from the candidate list.
-        
+
        Args:
            input_track (str): Input track in "ARTIST - SONG" format
            candidate_tracks (List[tuple[int|str, str]]): List of candidate tracks
-            
+
        Returns:
            Optional[tuple[int, str, float]]: Tuple of (best matching track, similarity score)
                                       or None if no good match found
        """

-
        if not input_track or not candidate_tracks:
            return None

        # Normalize input track
        input_track = self._normalize_string(input_track)
-        
+
        best_match = None
        best_score: float = 0.0

@ -43,12 +46,16 @@ class TrackMatcher:
            normalized_candidate = self._normalize_string(candidate[1])
            if normalized_candidate.strip().lower() == input_track.strip().lower():
                return (candidate, 100.0)
-            
+
            # Calculate various similarity scores
            exact_score = 1.0 if input_track == normalized_candidate else 0.0
-            sequence_score = SequenceMatcher(None, input_track, normalized_candidate).ratio()
-            token_score = self._calculate_token_similarity(input_track, normalized_candidate)
-            
+            sequence_score = SequenceMatcher(
+                None, input_track, normalized_candidate
+            ).ratio()
+            token_score = self._calculate_token_similarity(
+                input_track, normalized_candidate
+            )
+
            # Take the maximum of the different scoring methods
            final_score = max(exact_score, sequence_score, token_score)

@ -59,7 +66,7 @@ class TrackMatcher:
        # Return the match only if it meets the threshold
        if best_score < self.threshold:
            return None
-        match: tuple = (best_match, round(best_score * 100)) 
+        match: tuple = (best_match, round(best_score * 100))
        return match

    def _normalize_string(self, text: str) -> str:
@ -72,9 +79,9 @@ class TrackMatcher:
            str: Normalized text
        """
        # Remove special characters and convert to lowercase
-        text = regex.sub(r'[^\w\s-]', '', text).lower()
+        text = regex.sub(r"[^\w\s-]", "", text).lower()
        # Normalize spaces
-        text = ' '.join(text.split())
+        text = " ".join(text.split())
        return text

    def _calculate_token_similarity(self, str1: str, str2: str) -> float:
@ -88,28 +95,32 @@ class TrackMatcher:
        """
        tokens1 = set(str1.split())
        tokens2 = set(str2.split())
-        
+
        if not tokens1 or not tokens2:
            return 0.0

        intersection = tokens1.intersection(tokens2)
        union = tokens1.union(tokens2)
-        
+
        return len(intersection) / len(union)
-    
+
+
 class DataUtils:
    """
    Data Utils
    """

    def __init__(self) -> None:
-        self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
-        self.scrub_regex_1: Pattern = regex.compile(r'(\[.*?\])(\s){0,}(\:){0,1}')
-        self.scrub_regex_2: Pattern = regex.compile(r'(\d?)(Embed\b)',
-                                                    flags=regex.IGNORECASE)
-        self.scrub_regex_3: Pattern = regex.compile(r'\n{2}')
-        self.scrub_regex_4: Pattern = regex.compile(r'[0-9]\b$')
-        
+        self.lrc_regex = regex.compile(
+            r"\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}"
+        )
+        self.scrub_regex_1: Pattern = regex.compile(r"(\[.*?\])(\s){0,}(\:){0,1}")
+        self.scrub_regex_2: Pattern = regex.compile(
+            r"(\d?)(Embed\b)", flags=regex.IGNORECASE
+        )
+        self.scrub_regex_3: Pattern = regex.compile(r"\n{2}")
+        self.scrub_regex_4: Pattern = regex.compile(r"[0-9]\b$")
+
    def scrub_lyrics(self, lyrics: str) -> str:
        """
        Lyric Scrub Regex Chain
@ -118,11 +129,11 @@ class DataUtils:
        Returns:
            str: Regex scrubbed lyrics
        """
-        lyrics = self.scrub_regex_1.sub('', lyrics)
-        lyrics = self.scrub_regex_2.sub('', lyrics)
-        lyrics = self.scrub_regex_3.sub('\n', lyrics)  # Gaps between verses
-        lyrics = self.scrub_regex_3.sub('', lyrics)
-        return lyrics   
+        lyrics = self.scrub_regex_1.sub("", lyrics)
+        lyrics = self.scrub_regex_2.sub("", lyrics)
+        lyrics = self.scrub_regex_3.sub("\n", lyrics)  # Gaps between verses
+        lyrics = self.scrub_regex_3.sub("", lyrics)
+        return lyrics

    def create_lrc_object(self, lrc_str: str) -> list[dict]:
        """
@ -142,15 +153,21 @@ class DataUtils:
            if not reg_helper:
                continue
            reg_helper = reg_helper[0]
-            logging.debug("Reg helper: %s for line: %s; len: %s",
-                            reg_helper, line, len(reg_helper))
+            logging.debug(
+                "Reg helper: %s for line: %s; len: %s",
+                reg_helper,
+                line,
+                len(reg_helper),
+            )
            _timetag = reg_helper[0]
            if not reg_helper[1].strip():
                _words = "♪"
            else:
                _words = reg_helper[1].strip()
-            lrc_out.append({
-                "timeTag": _timetag,
-                "words": _words,
-            })
-        return lrc_out        
+            lrc_out.append(
+                {
+                    "timeTag": _timetag,
+                    "words": _words,
+                }
+            )
+        return lrc_out