radio_util: open tracks SQLite DB in readonly mode; black: reformat files
This commit is contained in:
@ -1,29 +1,31 @@
|
||||
import sys
|
||||
sys.path.insert(1,'..')
|
||||
|
||||
sys.path.insert(1, "..")
|
||||
import traceback
|
||||
import logging
|
||||
import time
|
||||
import re
|
||||
from typing import Optional
|
||||
from aiohttp import ClientTimeout, ClientSession
|
||||
from bs4 import BeautifulSoup, ResultSet # type: ignore
|
||||
from bs4 import BeautifulSoup, ResultSet # type: ignore
|
||||
import html as htm
|
||||
from . import private, common, cache, redis_cache
|
||||
from lyric_search import utils
|
||||
from lyric_search.constructors import (
|
||||
LyricsResult, InvalidGeniusResponseException)
|
||||
from lyric_search.constructors import LyricsResult, InvalidGeniusResponseException
|
||||
|
||||
logger = logging.getLogger()
|
||||
log_level = logging.getLevelName(logger.level)
|
||||
|
||||
|
||||
class Genius:
|
||||
"""
|
||||
Genius Search Module
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.label: str = "Genius"
|
||||
self.genius_url: str = private.GENIUS_URL
|
||||
self.genius_search_url: str = f'{self.genius_url}api/search/song?q='
|
||||
self.genius_search_url: str = f"{self.genius_url}api/search/song?q="
|
||||
self.headers: dict = common.SCRAPE_HEADERS
|
||||
self.timeout = ClientTimeout(connect=3, sock_read=5)
|
||||
self.datautils = utils.DataUtils()
|
||||
@ -31,8 +33,7 @@ class Genius:
|
||||
self.cache = cache.Cache()
|
||||
self.redis_cache = redis_cache.RedisCache()
|
||||
|
||||
async def search(self, artist: str, song: str,
|
||||
**kwargs) -> Optional[LyricsResult]:
|
||||
async def search(self, artist: str, song: str, **kwargs) -> Optional[LyricsResult]:
|
||||
"""
|
||||
Genius Search
|
||||
Args:
|
||||
@ -45,96 +46,125 @@ class Genius:
|
||||
artist: str = artist.strip().lower()
|
||||
song: str = song.strip().lower()
|
||||
time_start: float = time.time()
|
||||
logging.info("Searching %s - %s on %s",
|
||||
artist, song, self.label)
|
||||
search_term: str = f'{artist}%20{song}'
|
||||
returned_lyrics: str = ''
|
||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
||||
search_term: str = f"{artist}%20{song}"
|
||||
returned_lyrics: str = ""
|
||||
async with ClientSession() as client:
|
||||
async with client.get(f'{self.genius_search_url}{search_term}',
|
||||
timeout=self.timeout,
|
||||
headers=self.headers) as request:
|
||||
async with client.get(
|
||||
f"{self.genius_search_url}{search_term}",
|
||||
timeout=self.timeout,
|
||||
headers=self.headers,
|
||||
) as request:
|
||||
request.raise_for_status()
|
||||
text: Optional[str] = await request.text()
|
||||
|
||||
|
||||
if not text:
|
||||
raise InvalidGeniusResponseException("No search response.")
|
||||
|
||||
|
||||
if len(text) < 100:
|
||||
raise InvalidGeniusResponseException("Search response text was invalid (len < 100 chars.)")
|
||||
raise InvalidGeniusResponseException(
|
||||
"Search response text was invalid (len < 100 chars.)"
|
||||
)
|
||||
search_data = await request.json()
|
||||
|
||||
|
||||
if not isinstance(search_data, dict):
|
||||
raise InvalidGeniusResponseException("Invalid JSON.")
|
||||
|
||||
if not isinstance(search_data['response'], dict):
|
||||
raise InvalidGeniusResponseException(f"Invalid JSON: Cannot find response key.\n{search_data}")
|
||||
|
||||
if not isinstance(search_data['response']['sections'], list):
|
||||
raise InvalidGeniusResponseException(f"Invalid JSON: Cannot find response->sections key.\n{search_data}")
|
||||
|
||||
if not isinstance(search_data['response']['sections'][0]['hits'], list):
|
||||
raise InvalidGeniusResponseException("Invalid JSON: Cannot find response->sections[0]->hits key.")
|
||||
|
||||
possible_matches: list = search_data['response']['sections'][0]['hits']
|
||||
|
||||
if not isinstance(search_data["response"], dict):
|
||||
raise InvalidGeniusResponseException(
|
||||
f"Invalid JSON: Cannot find response key.\n{search_data}"
|
||||
)
|
||||
|
||||
if not isinstance(search_data["response"]["sections"], list):
|
||||
raise InvalidGeniusResponseException(
|
||||
f"Invalid JSON: Cannot find response->sections key.\n{search_data}"
|
||||
)
|
||||
|
||||
if not isinstance(
|
||||
search_data["response"]["sections"][0]["hits"], list
|
||||
):
|
||||
raise InvalidGeniusResponseException(
|
||||
"Invalid JSON: Cannot find response->sections[0]->hits key."
|
||||
)
|
||||
|
||||
possible_matches: list = search_data["response"]["sections"][0][
|
||||
"hits"
|
||||
]
|
||||
to_scrape: list[tuple] = [
|
||||
(
|
||||
returned['result']['path'],
|
||||
f'{returned['result']['artist_names']} - {returned['result']['title']}',
|
||||
) for returned in possible_matches
|
||||
returned["result"]["path"],
|
||||
f"{returned['result']['artist_names']} - {returned['result']['title']}",
|
||||
)
|
||||
for returned in possible_matches
|
||||
]
|
||||
searched: str = f"{artist} - {song}"
|
||||
best_match: tuple = self.matcher.find_best_match(input_track=searched,
|
||||
candidate_tracks=to_scrape)
|
||||
best_match: tuple = self.matcher.find_best_match(
|
||||
input_track=searched, candidate_tracks=to_scrape
|
||||
)
|
||||
((scrape_stub, track), confidence) = best_match
|
||||
scrape_url: str = f'{self.genius_url}{scrape_stub[1:]}'
|
||||
|
||||
async with client.get(scrape_url,
|
||||
timeout=self.timeout,
|
||||
headers=self.headers) as scrape_request:
|
||||
scrape_url: str = f"{self.genius_url}{scrape_stub[1:]}"
|
||||
|
||||
async with client.get(
|
||||
scrape_url, timeout=self.timeout, headers=self.headers
|
||||
) as scrape_request:
|
||||
scrape_request.raise_for_status()
|
||||
scrape_text: Optional[str] = await scrape_request.text()
|
||||
|
||||
|
||||
if not scrape_text:
|
||||
raise InvalidGeniusResponseException("No scrape response.")
|
||||
|
||||
|
||||
if len(scrape_text) < 100:
|
||||
raise InvalidGeniusResponseException("Scrape response was invalid (len < 100 chars.)")
|
||||
|
||||
|
||||
html = BeautifulSoup(htm.unescape(scrape_text).replace('<br/>', '\n'), "html.parser")
|
||||
|
||||
header_tags_genius: Optional[ResultSet] = html.find_all(class_=re.compile(r'.*Header.*'))
|
||||
raise InvalidGeniusResponseException(
|
||||
"Scrape response was invalid (len < 100 chars.)"
|
||||
)
|
||||
|
||||
html = BeautifulSoup(
|
||||
htm.unescape(scrape_text).replace("<br/>", "\n"),
|
||||
"html.parser",
|
||||
)
|
||||
|
||||
header_tags_genius: Optional[ResultSet] = html.find_all(
|
||||
class_=re.compile(r".*Header.*")
|
||||
)
|
||||
if header_tags_genius:
|
||||
for tag in header_tags_genius:
|
||||
tag.extract()
|
||||
|
||||
divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"})
|
||||
|
||||
|
||||
divs: Optional[ResultSet] = html.find_all(
|
||||
"div", {"data-lyrics-container": "true"}
|
||||
)
|
||||
|
||||
if not divs:
|
||||
return
|
||||
|
||||
|
||||
for div in divs:
|
||||
header_tags: Optional[ResultSet] = div.find_all(['h1', 'h2', 'h3', 'h4', 'h5'])
|
||||
header_tags: Optional[ResultSet] = div.find_all(
|
||||
["h1", "h2", "h3", "h4", "h5"]
|
||||
)
|
||||
if header_tags:
|
||||
for tag in header_tags:
|
||||
tag.extract()
|
||||
tag.extract()
|
||||
|
||||
returned_lyrics += div.get_text()
|
||||
|
||||
returned_lyrics: str = self.datautils.scrub_lyrics(returned_lyrics)
|
||||
|
||||
returned_lyrics: str = self.datautils.scrub_lyrics(
|
||||
returned_lyrics
|
||||
)
|
||||
artist: str = track.split(" - ", maxsplit=1)[0]
|
||||
song: str = track.split(" - ", maxsplit=1)[1]
|
||||
logging.info("Result found on %s", self.label)
|
||||
time_end: float = time.time()
|
||||
time_diff: float = time_end - time_start
|
||||
matched = LyricsResult(artist=artist,
|
||||
song=song,
|
||||
src=self.label,
|
||||
lyrics=returned_lyrics,
|
||||
confidence=confidence,
|
||||
time=time_diff)
|
||||
matched = LyricsResult(
|
||||
artist=artist,
|
||||
song=song,
|
||||
src=self.label,
|
||||
lyrics=returned_lyrics,
|
||||
confidence=confidence,
|
||||
time=time_diff,
|
||||
)
|
||||
await self.redis_cache.increment_found_count(self.label)
|
||||
await self.cache.store(matched)
|
||||
return matched
|
||||
except:
|
||||
traceback.print_exc()
|
||||
traceback.print_exc()
|
||||
|
Reference in New Issue
Block a user