diff --git a/lyric_search/sources/aggregate.py b/lyric_search/sources/aggregate.py index ab2e4db..9e44ec4 100644 --- a/lyric_search/sources/aggregate.py +++ b/lyric_search/sources/aggregate.py @@ -47,7 +47,7 @@ class Aggregate: search_result: Optional[LyricsResult] = None for source in sources: if source.label.lower() in self.exclude_methods: - if source.label.lower() == "cache" or not plain: + if not plain: logging.info("Exclude conditions rejected - source requested to exclude: %s, plain: %s", source.label, plain) else: diff --git a/lyric_search/sources/genius.py b/lyric_search/sources/genius.py index 272d5a1..667b18f 100644 --- a/lyric_search/sources/genius.py +++ b/lyric_search/sources/genius.py @@ -3,6 +3,7 @@ sys.path.insert(1,'..') import traceback import logging import time +import re from typing import Optional from aiohttp import ClientTimeout, ClientSession from bs4 import BeautifulSoup, ResultSet # type: ignore @@ -101,16 +102,23 @@ class Genius: html = BeautifulSoup(htm.unescape(scrape_text).replace('
', '\n'), "html.parser") - header_tags: Optional[ResultSet] = html.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'span']) - if header_tags: - for tag in header_tags: - tag.extract() - divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"}) + header_tags_genius: Optional[ResultSet] = html.find_all(class_=re.compile('.*Header.*')) + if header_tags_genius: + for tag in header_tags_genius: + tag.extract() + + divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"}) + if not divs: return for div in divs: + header_tags: Optional[ResultSet] = div.find_all(['h1', 'h2', 'h3', 'h4', 'h5']) + if header_tags: + for tag in header_tags: + tag.extract() + returned_lyrics += div.get_text() returned_lyrics: str = self.datautils.scrub_lyrics(returned_lyrics) diff --git a/utils/radio_util.py b/utils/radio_util.py index 1762b45..688ff0a 100644 --- a/utils/radio_util.py +++ b/utils/radio_util.py @@ -186,15 +186,15 @@ class RadioUtil: LIMITED TO ONE/SMALL SUBSET OF GENRES """ - # db_query = 'SELECT distinct(artist || " - " || song) AS artistdashsong, id, artist, song, album, genre, file_path, duration FROM tracks\ - # WHERE artist LIKE "%tethered%" OR artist LIKE "%more of mysel%" ORDER BY artist DESC, album ASC, song ASC' + db_query = 'SELECT distinct(artist || " - " || song) AS artistdashsong, id, artist, song, album, genre, file_path, duration FROM tracks\ + WHERE genre LIKE "%edm%" OR artist LIKE "%sullivan king%" OR artist LIKE "%kai wachi%" OR artist LIKE "%kayzo%" ORDER BY RANDOM()' #ORDER BY artist DESC, album ASC, song ASC' """ LIMITED TO ONE/SOME ARTISTS... """ # db_query = 'SELECT distinct(artist || " - " || song) AS artistdashsong, id, artist, song, album, genre, file_path, duration FROM tracks\ - # WHERE (artist LIKE "%sullivan king%" OR artist LIKE "%kayzo%" OR artist like "%kai wachi%" OR genre LIKE "%edm%" OR genre LIKE "%electronicore%") AND (NOT song LIKE "%%stripped%%" AND NOT song LIKE "%(2022)%" AND NOT song LIKE "%(live%%" AND NOT song LIKE "%%acoustic%%" AND NOT song LIKE "%%instrumental%%" AND NOT song LIKE "%%remix%%" AND NOT song LIKE "%%reimagined%%" AND NOT song LIKE "%%alternative%%" AND NOT song LIKE "%%unzipped%%") GROUP BY artistdashsong ORDER BY RANDOM()'# ORDER BY album ASC, id ASC' + # WHERE (artist LIKE "%a scent like wolves%" OR artist LIKE "%bad wolves%" or artist LIKE "%oceans%" OR artist LIKE "%oh,%" OR artist LIKE "%august%" OR artist LIKE "%periphery%") AND (NOT song LIKE "%%stripped%%" AND NOT song LIKE "%(2022)%" AND NOT song LIKE "%(live%%" AND NOT song LIKE "%%acoustic%%" AND NOT song LIKE "%%instrumental%%" AND NOT song LIKE "%%remix%%" AND NOT song LIKE "%%reimagined%%" AND NOT song LIKE "%%alternative%%" AND NOT song LIKE "%%unzipped%%") GROUP BY artistdashsong ORDER BY RANDOM()'# ORDER BY album ASC, id ASC' async with sqlite3.connect(self.active_playlist_path, timeout=2) as db_conn: