allow for excluding cache on lyrics search (temp?) + addl genius fixes

This commit is contained in:
2025-04-08 11:27:56 -04:00
parent fed5307386
commit c4ae59ca9f
3 changed files with 17 additions and 9 deletions

View File

@ -3,6 +3,7 @@ sys.path.insert(1,'..')
import traceback
import logging
import time
import re
from typing import Optional
from aiohttp import ClientTimeout, ClientSession
from bs4 import BeautifulSoup, ResultSet # type: ignore
@ -101,16 +102,23 @@ class Genius:
html = BeautifulSoup(htm.unescape(scrape_text).replace('<br/>', '\n'), "html.parser")
header_tags: Optional[ResultSet] = html.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'span'])
if header_tags:
for tag in header_tags:
tag.extract()
divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"})
header_tags_genius: Optional[ResultSet] = html.find_all(class_=re.compile('.*Header.*'))
if header_tags_genius:
for tag in header_tags_genius:
tag.extract()
divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"})
if not divs:
return
for div in divs:
header_tags: Optional[ResultSet] = div.find_all(['h1', 'h2', 'h3', 'h4', 'h5'])
if header_tags:
for tag in header_tags:
tag.extract()
returned_lyrics += div.get_text()
returned_lyrics: str = self.datautils.scrub_lyrics(returned_lyrics)