diff --git a/lyric_search_new/__init__.py b/lyric_search_new/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lyric_search_new/sources/__init__.py b/lyric_search_new/sources/__init__.py new file mode 100644 index 0000000..ef1cd04 --- /dev/null +++ b/lyric_search_new/sources/__init__.py @@ -0,0 +1,4 @@ +from . import cache +from . import genius +from . import spotify +from . import common \ No newline at end of file diff --git a/lyric_search_new/sources/cache.py b/lyric_search_new/sources/cache.py new file mode 100644 index 0000000..461013e --- /dev/null +++ b/lyric_search_new/sources/cache.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3.12 + +class Cache: + """Cache Search Module""" + def __init__(self): + pass + + diff --git a/lyric_search_new/sources/common.py b/lyric_search_new/sources/common.py new file mode 100644 index 0000000..4b8be2b --- /dev/null +++ b/lyric_search_new/sources/common.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3.12 +SCRAPE_HEADERS = { + 'accept': '*/*', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0', + } \ No newline at end of file diff --git a/lyric_search_new/sources/genius.py b/lyric_search_new/sources/genius.py new file mode 100644 index 0000000..1c98e11 --- /dev/null +++ b/lyric_search_new/sources/genius.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3.12 + +from .. import private +from . import common +from aiohttp import ClientTimeout, ClientSession, ClientError + +class Genius: + """Genius Search Module""" + def __init__(self): + self.genius_url = private.genius_url + self.genius_search_url = f'{self.genius_url}api/search/song?q=' + self.headers = common.SCRAPE_HEADERS + self.timeout = ClientTimeout(connect=2, sock_read=2.5) + + async def search(self, artist: str, song: str): + """ + @artist: the artist to search + @song: the song to search + """ + + + + + + diff --git a/lyric_search_new/sources/spotify.py b/lyric_search_new/sources/spotify.py new file mode 100644 index 0000000..e69de29 diff --git a/lyric_search_new/utils.py b/lyric_search_new/utils.py new file mode 100644 index 0000000..3ae7351 --- /dev/null +++ b/lyric_search_new/utils.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3.12 + +from difflib import SequenceMatcher +from typing import List, Optional, Tuple +import re + +# Example usage: +if __name__ == "__main__": + matcher = TrackMatcher(threshold=0.85) + + candidate_tracks = [ + "The Beatles - Hey Jude", + "Led Zeppelin - Stairway to Heaven", + "Queen - Bohemian Rhapsody", + "Pink Floyd - Comfortably Numb", + "The Beatles - Hey Jules", # Intentionally similar to "Hey Jude" + ] + + # Test cases + test_tracks = [ + "The Beatles - Hey Jude", # Exact match + "Beatles - Hey Jude", # Similar match + "The Beatles - Hey Jules", # Similar but different + "Metallica - Nothing Else Matters", # No match + "Queen - bohemian rhapsody", # Different case + ] + + for test_track in test_tracks: + result = matcher.find_best_match(test_track, candidate_tracks) + if result: + match, score = result + print(f"Input: {test_track}") + print(f"Best match: {match}") + print(f"Similarity score: {score:.3f}\n") + else: + print(f"No good match found for: {test_track}\n") + +class TrackMatcher: + """Track Matcher""" + def __init__(self, threshold: float = 0.85): + """ + Initialize the TrackMatcher with a similarity threshold. + + Args: + threshold (float): Minimum similarity score to consider a match valid + (between 0 and 1, default 0.85) + """ + self.threshold = threshold + + def find_best_match(self, input_track: str, candidate_tracks: List[str]) -> Optional[Tuple[str, float]]: + """ + Find the best matching track from the candidate list. + + Args: + input_track (str): Input track in "ARTIST - SONG" format + candidate_tracks (List[str]): List of candidate tracks in same format + + Returns: + Optional[Tuple[str, float]]: Tuple of (best matching track, similarity score) + or None if no good match found + """ + if not input_track or not candidate_tracks: + return None + + # Normalize input track + input_track = self._normalize_string(input_track) + + best_match = None + best_score = 0 + + for candidate in candidate_tracks: + normalized_candidate = self._normalize_string(candidate) + + # Calculate various similarity scores + exact_score = 1.0 if input_track == normalized_candidate else 0.0 + sequence_score = SequenceMatcher(None, input_track, normalized_candidate).ratio() + token_score = self._calculate_token_similarity(input_track, normalized_candidate) + + # Take the maximum of the different scoring methods + final_score = max(exact_score, sequence_score, token_score) + + if final_score > best_score: + best_score = final_score + best_match = candidate + + # Return the match only if it meets the threshold + return (best_match, best_score) if best_score >= self.threshold else None + + def _normalize_string(self, text: str) -> str: + """ + Normalize string for comparison by removing special characters, + extra spaces, and converting to lowercase. + """ + # Remove special characters and convert to lowercase + text = re.sub(r'[^\w\s-]', '', text.lower()) + # Normalize spaces + text = ' '.join(text.split()) + return text + + def _calculate_token_similarity(self, str1: str, str2: str) -> float: + """ + Calculate similarity based on matching tokens (words). + """ + tokens1 = set(str1.split()) + tokens2 = set(str2.split()) + + if not tokens1 or not tokens2: + return 0.0 + + intersection = tokens1.intersection(tokens2) + union = tokens1.union(tokens2) + + return len(intersection) / len(union) \ No newline at end of file