WIP

2025-01-12 20:19:48 -05:00
parent efe933a185
commit 725e463992
7 changed files with 155 additions and 0 deletions
--- a/lyric_search_new/init.py
+++ b/lyric_search_new/init.py
--- a/lyric_search_new/sources/init.py
+++ b/lyric_search_new/sources/init.py
@@ -0,0 +1,4 @@
 from . import cache
 from . import genius
 from . import spotify
 from . import common
--- a/lyric_search_new/sources/cache.py
+++ b/lyric_search_new/sources/cache.py
@@ -0,0 +1,8 @@
 #!/usr/bin/env python3.12
 class Cache:
    """Cache Search Module"""
    def __init__(self):
        pass
--- a/lyric_search_new/sources/common.py
+++ b/lyric_search_new/sources/common.py
@@ -0,0 +1,5 @@
 #!/usr/bin/env python3.12
 SCRAPE_HEADERS = {
    'accept': '*/*',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
    }
--- a/lyric_search_new/sources/genius.py
+++ b/lyric_search_new/sources/genius.py
@@ -0,0 +1,25 @@
 #!/usr/bin/env python3.12
 from .. import private
 from . import common
 from aiohttp import ClientTimeout, ClientSession, ClientError
 class Genius:
    """Genius Search Module"""
    def __init__(self):
        self.genius_url = private.genius_url
        self.genius_search_url = f'{self.genius_url}api/search/song?q='
        self.headers = common.SCRAPE_HEADERS
        self.timeout = ClientTimeout(connect=2, sock_read=2.5)
    async def search(self, artist: str, song: str):
        """
        @artist: the artist to search
        @song: the song to search
        """
--- a/lyric_search_new/sources/spotify.py
+++ b/lyric_search_new/sources/spotify.py
--- a/lyric_search_new/utils.py
+++ b/lyric_search_new/utils.py
@@ -0,0 +1,113 @@
 #!/usr/bin/env python3.12
 from difflib import SequenceMatcher
 from typing import List, Optional, Tuple
 import re
 # Example usage:
 if __name__ == "__main__":
    matcher = TrackMatcher(threshold=0.85)
    candidate_tracks = [
        "The Beatles - Hey Jude",
        "Led Zeppelin - Stairway to Heaven",
        "Queen - Bohemian Rhapsody",
        "Pink Floyd - Comfortably Numb",
        "The Beatles - Hey Jules",  # Intentionally similar to "Hey Jude"
    ]
    # Test cases
    test_tracks = [
        "The Beatles - Hey Jude",  # Exact match
        "Beatles - Hey Jude",      # Similar match
        "The Beatles - Hey Jules", # Similar but different
        "Metallica - Nothing Else Matters",  # No match
        "Queen - bohemian rhapsody",  # Different case
    ]
    for test_track in test_tracks:
        result = matcher.find_best_match(test_track, candidate_tracks)
        if result:
            match, score = result
            print(f"Input: {test_track}")
            print(f"Best match: {match}")
            print(f"Similarity score: {score:.3f}\n")
        else:
            print(f"No good match found for: {test_track}\n")
 class TrackMatcher:
    """Track Matcher"""
    def __init__(self, threshold: float = 0.85):
        """
        Initialize the TrackMatcher with a similarity threshold.
        Args:
            threshold (float): Minimum similarity score to consider a match valid
                             (between 0 and 1, default 0.85)
        """
        self.threshold = threshold
    def find_best_match(self, input_track: str, candidate_tracks: List[str]) -> Optional[Tuple[str, float]]:
        """
        Find the best matching track from the candidate list.
        Args:
            input_track (str): Input track in "ARTIST - SONG" format
            candidate_tracks (List[str]): List of candidate tracks in same format
        Returns:
            Optional[Tuple[str, float]]: Tuple of (best matching track, similarity score)
                                       or None if no good match found
        """
        if not input_track or not candidate_tracks:
            return None
        # Normalize input track
        input_track = self._normalize_string(input_track)
        best_match = None
        best_score = 0
        for candidate in candidate_tracks:
            normalized_candidate = self._normalize_string(candidate)
            # Calculate various similarity scores
            exact_score = 1.0 if input_track == normalized_candidate else 0.0
            sequence_score = SequenceMatcher(None, input_track, normalized_candidate).ratio()
            token_score = self._calculate_token_similarity(input_track, normalized_candidate)
            # Take the maximum of the different scoring methods
            final_score = max(exact_score, sequence_score, token_score)
            if final_score > best_score:
                best_score = final_score
                best_match = candidate
        # Return the match only if it meets the threshold
        return (best_match, best_score) if best_score >= self.threshold else None
    def _normalize_string(self, text: str) -> str:
        """
        Normalize string for comparison by removing special characters,
        extra spaces, and converting to lowercase.
        """
        # Remove special characters and convert to lowercase
        text = re.sub(r'[^\w\s-]', '', text.lower())
        # Normalize spaces
        text = ' '.join(text.split())
        return text
    def _calculate_token_similarity(self, str1: str, str2: str) -> float:
        """
        Calculate similarity based on matching tokens (words).
        """
        tokens1 = set(str1.split())
        tokens2 = set(str2.split())
        if not tokens1 or not tokens2:
            return 0.0
        intersection = tokens1.intersection(tokens2)
        union = tokens1.union(tokens2)
        return len(intersection) / len(union)