Genius site was updated, lyrics header/contributor info was relocated within a LyricsContainer div, resulting in that header/garbage being prefixed to returned lyrics. Resolved by finding unwanted tags and extracting from html

This commit is contained in:
codey 2025-04-07 11:08:07 -04:00
parent 8958636232
commit fed5307386
2 changed files with 7 additions and 3 deletions

View File

@ -101,6 +101,10 @@ class Genius:
html = BeautifulSoup(htm.unescape(scrape_text).replace('<br/>', '\n'), "html.parser")
header_tags: Optional[ResultSet] = html.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'span'])
if header_tags:
for tag in header_tags:
tag.extract()
divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"})
if not divs:

View File

@ -147,7 +147,7 @@ class RadioUtil:
"""
db_query: str = """SELECT distinct(LOWER(TRIM(artist)) || " - " || LOWER(TRIM(song))), (TRIM(artist) || " - " || TRIM(song)) AS artistdashsong, id, artist, song, album, genre, file_path, duration FROM tracks\
WHERE id >= 67166 AND (genre LIKE "%metalcore%"\
WHERE (genre LIKE "%metalcore%"\
OR genre LIKE "%rock%"\
OR genre LIKE "%pop punk%"\
OR genre LIKE "%math rock%"\
@ -187,14 +187,14 @@ class RadioUtil:
"""
# db_query = 'SELECT distinct(artist || " - " || song) AS artistdashsong, id, artist, song, album, genre, file_path, duration FROM tracks\
# WHERE genre like "%hip hop%" OR genre LIKE "%rap%" OR genre LIKE "%edm%" OR genre LIKE "%trap%"'
# WHERE artist LIKE "%tethered%" OR artist LIKE "%more of mysel%" ORDER BY artist DESC, album ASC, song ASC'
"""
LIMITED TO ONE/SOME ARTISTS...
"""
# db_query = 'SELECT distinct(artist || " - " || song) AS artistdashsong, id, artist, song, album, genre, file_path, duration FROM tracks\
# WHERE (artist LIKE "%bayside%") AND (NOT song LIKE "%%stripped%%" AND NOT song LIKE "%(live%%" AND NOT song LIKE "%%acoustic%%" AND NOT song LIKE "%%instrumental%%" AND NOT song LIKE "%%remix%%" AND NOT song LIKE "%%reimagined%%" AND NOT song LIKE "%%alternative%%" AND NOT song LIKE "%%unzipped%%") GROUP BY artistdashsong ORDER BY artist DESC, album DESC, id ASC'
# WHERE (artist LIKE "%sullivan king%" OR artist LIKE "%kayzo%" OR artist like "%kai wachi%" OR genre LIKE "%edm%" OR genre LIKE "%electronicore%") AND (NOT song LIKE "%%stripped%%" AND NOT song LIKE "%(2022)%" AND NOT song LIKE "%(live%%" AND NOT song LIKE "%%acoustic%%" AND NOT song LIKE "%%instrumental%%" AND NOT song LIKE "%%remix%%" AND NOT song LIKE "%%reimagined%%" AND NOT song LIKE "%%alternative%%" AND NOT song LIKE "%%unzipped%%") GROUP BY artistdashsong ORDER BY RANDOM()'# ORDER BY album ASC, id ASC'
async with sqlite3.connect(self.active_playlist_path,
timeout=2) as db_conn: