allow for excluding cache on lyrics search (temp?) + addl genius fixes
This commit is contained in:
@ -3,6 +3,7 @@ sys.path.insert(1,'..')
|
||||
import traceback
|
||||
import logging
|
||||
import time
|
||||
import re
|
||||
from typing import Optional
|
||||
from aiohttp import ClientTimeout, ClientSession
|
||||
from bs4 import BeautifulSoup, ResultSet # type: ignore
|
||||
@ -101,16 +102,23 @@ class Genius:
|
||||
|
||||
|
||||
html = BeautifulSoup(htm.unescape(scrape_text).replace('<br/>', '\n'), "html.parser")
|
||||
header_tags: Optional[ResultSet] = html.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'span'])
|
||||
if header_tags:
|
||||
for tag in header_tags:
|
||||
tag.extract()
|
||||
divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"})
|
||||
|
||||
header_tags_genius: Optional[ResultSet] = html.find_all(class_=re.compile('.*Header.*'))
|
||||
if header_tags_genius:
|
||||
for tag in header_tags_genius:
|
||||
tag.extract()
|
||||
|
||||
divs: Optional[ResultSet] = html.find_all("div", {"data-lyrics-container": "true"})
|
||||
|
||||
if not divs:
|
||||
return
|
||||
|
||||
for div in divs:
|
||||
header_tags: Optional[ResultSet] = div.find_all(['h1', 'h2', 'h3', 'h4', 'h5'])
|
||||
if header_tags:
|
||||
for tag in header_tags:
|
||||
tag.extract()
|
||||
|
||||
returned_lyrics += div.get_text()
|
||||
|
||||
returned_lyrics: str = self.datautils.scrub_lyrics(returned_lyrics)
|
||||
|
Reference in New Issue
Block a user