WIP
This commit is contained in:
parent
efe933a185
commit
725e463992
0
lyric_search_new/__init__.py
Normal file
0
lyric_search_new/__init__.py
Normal file
4
lyric_search_new/sources/__init__.py
Normal file
4
lyric_search_new/sources/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from . import cache
|
||||
from . import genius
|
||||
from . import spotify
|
||||
from . import common
|
8
lyric_search_new/sources/cache.py
Normal file
8
lyric_search_new/sources/cache.py
Normal file
@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env python3.12
|
||||
|
||||
class Cache:
|
||||
"""Cache Search Module"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
5
lyric_search_new/sources/common.py
Normal file
5
lyric_search_new/sources/common.py
Normal file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env python3.12
|
||||
SCRAPE_HEADERS = {
|
||||
'accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
|
||||
}
|
25
lyric_search_new/sources/genius.py
Normal file
25
lyric_search_new/sources/genius.py
Normal file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python3.12
|
||||
|
||||
from .. import private
|
||||
from . import common
|
||||
from aiohttp import ClientTimeout, ClientSession, ClientError
|
||||
|
||||
class Genius:
|
||||
"""Genius Search Module"""
|
||||
def __init__(self):
|
||||
self.genius_url = private.genius_url
|
||||
self.genius_search_url = f'{self.genius_url}api/search/song?q='
|
||||
self.headers = common.SCRAPE_HEADERS
|
||||
self.timeout = ClientTimeout(connect=2, sock_read=2.5)
|
||||
|
||||
async def search(self, artist: str, song: str):
|
||||
"""
|
||||
@artist: the artist to search
|
||||
@song: the song to search
|
||||
"""
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
0
lyric_search_new/sources/spotify.py
Normal file
0
lyric_search_new/sources/spotify.py
Normal file
113
lyric_search_new/utils.py
Normal file
113
lyric_search_new/utils.py
Normal file
@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3.12
|
||||
|
||||
from difflib import SequenceMatcher
|
||||
from typing import List, Optional, Tuple
|
||||
import re
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
matcher = TrackMatcher(threshold=0.85)
|
||||
|
||||
candidate_tracks = [
|
||||
"The Beatles - Hey Jude",
|
||||
"Led Zeppelin - Stairway to Heaven",
|
||||
"Queen - Bohemian Rhapsody",
|
||||
"Pink Floyd - Comfortably Numb",
|
||||
"The Beatles - Hey Jules", # Intentionally similar to "Hey Jude"
|
||||
]
|
||||
|
||||
# Test cases
|
||||
test_tracks = [
|
||||
"The Beatles - Hey Jude", # Exact match
|
||||
"Beatles - Hey Jude", # Similar match
|
||||
"The Beatles - Hey Jules", # Similar but different
|
||||
"Metallica - Nothing Else Matters", # No match
|
||||
"Queen - bohemian rhapsody", # Different case
|
||||
]
|
||||
|
||||
for test_track in test_tracks:
|
||||
result = matcher.find_best_match(test_track, candidate_tracks)
|
||||
if result:
|
||||
match, score = result
|
||||
print(f"Input: {test_track}")
|
||||
print(f"Best match: {match}")
|
||||
print(f"Similarity score: {score:.3f}\n")
|
||||
else:
|
||||
print(f"No good match found for: {test_track}\n")
|
||||
|
||||
class TrackMatcher:
|
||||
"""Track Matcher"""
|
||||
def __init__(self, threshold: float = 0.85):
|
||||
"""
|
||||
Initialize the TrackMatcher with a similarity threshold.
|
||||
|
||||
Args:
|
||||
threshold (float): Minimum similarity score to consider a match valid
|
||||
(between 0 and 1, default 0.85)
|
||||
"""
|
||||
self.threshold = threshold
|
||||
|
||||
def find_best_match(self, input_track: str, candidate_tracks: List[str]) -> Optional[Tuple[str, float]]:
|
||||
"""
|
||||
Find the best matching track from the candidate list.
|
||||
|
||||
Args:
|
||||
input_track (str): Input track in "ARTIST - SONG" format
|
||||
candidate_tracks (List[str]): List of candidate tracks in same format
|
||||
|
||||
Returns:
|
||||
Optional[Tuple[str, float]]: Tuple of (best matching track, similarity score)
|
||||
or None if no good match found
|
||||
"""
|
||||
if not input_track or not candidate_tracks:
|
||||
return None
|
||||
|
||||
# Normalize input track
|
||||
input_track = self._normalize_string(input_track)
|
||||
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
for candidate in candidate_tracks:
|
||||
normalized_candidate = self._normalize_string(candidate)
|
||||
|
||||
# Calculate various similarity scores
|
||||
exact_score = 1.0 if input_track == normalized_candidate else 0.0
|
||||
sequence_score = SequenceMatcher(None, input_track, normalized_candidate).ratio()
|
||||
token_score = self._calculate_token_similarity(input_track, normalized_candidate)
|
||||
|
||||
# Take the maximum of the different scoring methods
|
||||
final_score = max(exact_score, sequence_score, token_score)
|
||||
|
||||
if final_score > best_score:
|
||||
best_score = final_score
|
||||
best_match = candidate
|
||||
|
||||
# Return the match only if it meets the threshold
|
||||
return (best_match, best_score) if best_score >= self.threshold else None
|
||||
|
||||
def _normalize_string(self, text: str) -> str:
|
||||
"""
|
||||
Normalize string for comparison by removing special characters,
|
||||
extra spaces, and converting to lowercase.
|
||||
"""
|
||||
# Remove special characters and convert to lowercase
|
||||
text = re.sub(r'[^\w\s-]', '', text.lower())
|
||||
# Normalize spaces
|
||||
text = ' '.join(text.split())
|
||||
return text
|
||||
|
||||
def _calculate_token_similarity(self, str1: str, str2: str) -> float:
|
||||
"""
|
||||
Calculate similarity based on matching tokens (words).
|
||||
"""
|
||||
tokens1 = set(str1.split())
|
||||
tokens2 = set(str2.split())
|
||||
|
||||
if not tokens1 or not tokens2:
|
||||
return 0.0
|
||||
|
||||
intersection = tokens1.intersection(tokens2)
|
||||
union = tokens1.union(tokens2)
|
||||
|
||||
return len(intersection) / len(union)
|
Loading…
x
Reference in New Issue
Block a user