cleanup
This commit is contained in:
@ -1,9 +1,10 @@
|
||||
#!/usr/bin/env python3.12
|
||||
|
||||
from difflib import SequenceMatcher
|
||||
from typing import List, Optional, Tuple
|
||||
from typing import List, Optional, Union, Any
|
||||
import logging
|
||||
import regex
|
||||
from regex import Pattern
|
||||
|
||||
class TrackMatcher:
|
||||
"""Track Matcher"""
|
||||
@ -17,7 +18,7 @@ class TrackMatcher:
|
||||
"""
|
||||
self.threshold = threshold
|
||||
|
||||
def find_best_match(self, input_track: str, candidate_tracks: List[tuple[int|str, str]]) -> Optional[Tuple[str, float]]:
|
||||
def find_best_match(self, input_track: str, candidate_tracks: List[tuple[int|str, str]]) -> Optional[tuple]:
|
||||
"""
|
||||
Find the best matching track from the candidate list.
|
||||
|
||||
@ -26,7 +27,7 @@ class TrackMatcher:
|
||||
candidate_tracks (List[tuple[int|str, str]]): List of candidate tracks
|
||||
|
||||
Returns:
|
||||
Optional[Tuple[int, str, float]]: Tuple of (best matching track, similarity score)
|
||||
Optional[tuple[int, str, float]]: Tuple of (best matching track, similarity score)
|
||||
or None if no good match found
|
||||
"""
|
||||
|
||||
@ -38,7 +39,7 @@ class TrackMatcher:
|
||||
input_track = self._normalize_string(input_track)
|
||||
|
||||
best_match = None
|
||||
best_score = 0
|
||||
best_score: float = 0.0
|
||||
|
||||
for candidate in candidate_tracks:
|
||||
normalized_candidate = self._normalize_string(candidate[1])
|
||||
@ -56,7 +57,10 @@ class TrackMatcher:
|
||||
best_match = candidate
|
||||
|
||||
# Return the match only if it meets the threshold
|
||||
return (best_match, round(best_score * 100)) if best_score >= self.threshold else None
|
||||
if best_score >= self.threshold:
|
||||
return None
|
||||
match: tuple = (best_match, round(best_score * 100))
|
||||
return match
|
||||
|
||||
def _normalize_string(self, text: str) -> str:
|
||||
"""
|
||||
@ -98,10 +102,14 @@ class DataUtils:
|
||||
Data Utils
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.lrc_regex = regex.compile(r'\[([0-9]{2}:[0-9]{2})\.[0-9]{1,3}\](\s(.*)){0,}')
|
||||
|
||||
|
||||
self.scrub_regex_1: Pattern = regex.compile(r'(\[.*?\])(\s){0,}(\:){0,1}')
|
||||
self.scrub_regex_2: Pattern = regex.compile(r'(\d?)(Embed\b)',
|
||||
flags=regex.IGNORECASe)
|
||||
self.scrub_regex_3: Pattern = regex.compile(r'\n{2}')
|
||||
self.scrub_regex_4: Pattern = regex.compile(r'[0-9]\b$')
|
||||
|
||||
def scrub_lyrics(self, lyrics: str) -> str:
|
||||
"""
|
||||
Lyric Scrub Regex Chain
|
||||
@ -110,10 +118,10 @@ class DataUtils:
|
||||
Returns:
|
||||
str: Regex scrubbed lyrics
|
||||
"""
|
||||
lyrics = regex.sub(r'(\[.*?\])(\s){0,}(\:){0,1}', '', lyrics)
|
||||
lyrics = regex.sub(r'(\d?)(Embed\b)', '', lyrics, flags=regex.IGNORECASE)
|
||||
lyrics = regex.sub(r'\n{2}', '\n', lyrics) # Gaps between verses
|
||||
lyrics = regex.sub(r'[0-9]\b$', '', lyrics)
|
||||
lyrics = self.scrub_regex_1.sub('', lyrics)
|
||||
lyrics = self.scrub_regex_2.sub('', lyrics, flags=regex.IGNORECASE)
|
||||
lyrics = self.scrub_regex_3.sub('\n', lyrics) # Gaps between verses
|
||||
lyrics = self.scrub_regex_3.sub('', lyrics)
|
||||
return lyrics
|
||||
|
||||
def create_lrc_object(self, lrc_str: str) -> list[dict]:
|
||||
|
Reference in New Issue
Block a user