WIP

2025-01-12 20:19:48 -05:00
parent efe933a185
commit 725e463992
7 changed files with 155 additions and 0 deletions
--- a/lyric_search_new/init.py
+++ b/lyric_search_new/init.py
--- a/lyric_search_new/sources/init.py
+++ b/lyric_search_new/sources/init.py
@@ -0,0 +1,4 @@
+from . import cache
+from . import genius
+from . import spotify
+from . import common
--- a/lyric_search_new/sources/cache.py
+++ b/lyric_search_new/sources/cache.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3.12
+
+class Cache:
+    """Cache Search Module"""
+    def __init__(self):
+        pass
+
+
--- a/lyric_search_new/sources/common.py
+++ b/lyric_search_new/sources/common.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3.12
+SCRAPE_HEADERS = {
+    'accept': '*/*',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
+    }
--- a/lyric_search_new/sources/genius.py
+++ b/lyric_search_new/sources/genius.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3.12
+
+from .. import private
+from . import common
+from aiohttp import ClientTimeout, ClientSession, ClientError
+
+class Genius:
+    """Genius Search Module"""
+    def __init__(self):
+        self.genius_url = private.genius_url
+        self.genius_search_url = f'{self.genius_url}api/search/song?q='
+        self.headers = common.SCRAPE_HEADERS
+        self.timeout = ClientTimeout(connect=2, sock_read=2.5)
+
+    async def search(self, artist: str, song: str):
+        """
+        @artist: the artist to search
+        @song: the song to search
+        """
+
+        
+
+        
+
+
--- a/lyric_search_new/sources/spotify.py
+++ b/lyric_search_new/sources/spotify.py
--- a/lyric_search_new/utils.py
+++ b/lyric_search_new/utils.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3.12
+
+from difflib import SequenceMatcher
+from typing import List, Optional, Tuple
+import re
+
+# Example usage:
+if __name__ == "__main__":
+    matcher = TrackMatcher(threshold=0.85)
+    
+    candidate_tracks = [
+        "The Beatles - Hey Jude",
+        "Led Zeppelin - Stairway to Heaven",
+        "Queen - Bohemian Rhapsody",
+        "Pink Floyd - Comfortably Numb",
+        "The Beatles - Hey Jules",  # Intentionally similar to "Hey Jude"
+    ]
+
+    # Test cases
+    test_tracks = [
+        "The Beatles - Hey Jude",  # Exact match
+        "Beatles - Hey Jude",      # Similar match
+        "The Beatles - Hey Jules", # Similar but different
+        "Metallica - Nothing Else Matters",  # No match
+        "Queen - bohemian rhapsody",  # Different case
+    ]
+
+    for test_track in test_tracks:
+        result = matcher.find_best_match(test_track, candidate_tracks)
+        if result:
+            match, score = result
+            print(f"Input: {test_track}")
+            print(f"Best match: {match}")
+            print(f"Similarity score: {score:.3f}\n")
+        else:
+            print(f"No good match found for: {test_track}\n")
+
+class TrackMatcher:
+    """Track Matcher"""
+    def __init__(self, threshold: float = 0.85):
+        """
+        Initialize the TrackMatcher with a similarity threshold.
+        
+        Args:
+            threshold (float): Minimum similarity score to consider a match valid
+                             (between 0 and 1, default 0.85)
+        """
+        self.threshold = threshold
+
+    def find_best_match(self, input_track: str, candidate_tracks: List[str]) -> Optional[Tuple[str, float]]:
+        """
+        Find the best matching track from the candidate list.
+        
+        Args:
+            input_track (str): Input track in "ARTIST - SONG" format
+            candidate_tracks (List[str]): List of candidate tracks in same format
+            
+        Returns:
+            Optional[Tuple[str, float]]: Tuple of (best matching track, similarity score)
+                                       or None if no good match found
+        """
+        if not input_track or not candidate_tracks:
+            return None
+
+        # Normalize input track
+        input_track = self._normalize_string(input_track)
+        
+        best_match = None
+        best_score = 0
+
+        for candidate in candidate_tracks:
+            normalized_candidate = self._normalize_string(candidate)
+            
+            # Calculate various similarity scores
+            exact_score = 1.0 if input_track == normalized_candidate else 0.0
+            sequence_score = SequenceMatcher(None, input_track, normalized_candidate).ratio()
+            token_score = self._calculate_token_similarity(input_track, normalized_candidate)
+            
+            # Take the maximum of the different scoring methods
+            final_score = max(exact_score, sequence_score, token_score)
+
+            if final_score > best_score:
+                best_score = final_score
+                best_match = candidate
+
+        # Return the match only if it meets the threshold
+        return (best_match, best_score) if best_score >= self.threshold else None
+
+    def _normalize_string(self, text: str) -> str:
+        """
+        Normalize string for comparison by removing special characters,
+        extra spaces, and converting to lowercase.
+        """
+        # Remove special characters and convert to lowercase
+        text = re.sub(r'[^\w\s-]', '', text.lower())
+        # Normalize spaces
+        text = ' '.join(text.split())
+        return text
+
+    def _calculate_token_similarity(self, str1: str, str2: str) -> float:
+        """
+        Calculate similarity based on matching tokens (words).
+        """
+        tokens1 = set(str1.split())
+        tokens2 = set(str2.split())
+        
+        if not tokens1 or not tokens2:
+            return 0.0
+
+        intersection = tokens1.intersection(tokens2)
+        union = tokens1.union(tokens2)
+        
+        return len(intersection) / len(union)