More progress re: #34
- Change of direction, LRCLib searches from /lyric/search now use internal cache - which is a PGSQL import of the LRCLib SQLite database. Change to PGSQL was made for performance.
This commit is contained in:
1
base.py
1
base.py
@@ -108,7 +108,6 @@ routes: dict = {
|
|||||||
"lyrics": importlib.import_module("endpoints.lyric_search").LyricSearch(
|
"lyrics": importlib.import_module("endpoints.lyric_search").LyricSearch(
|
||||||
app, util, constants
|
app, util, constants
|
||||||
),
|
),
|
||||||
"lrclib": importlib.import_module("endpoints.lrclib").LRCLib(app, util, constants),
|
|
||||||
"yt": importlib.import_module("endpoints.yt").YT(app, util, constants),
|
"yt": importlib.import_module("endpoints.yt").YT(app, util, constants),
|
||||||
"radio": importlib.import_module("endpoints.radio").Radio(
|
"radio": importlib.import_module("endpoints.radio").Radio(
|
||||||
app, util, constants, loop
|
app, util, constants, loop
|
||||||
|
|||||||
@@ -110,25 +110,6 @@ class ValidLyricRequest(BaseModel):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ValidLRCLibRequest(BaseModel):
|
|
||||||
"""
|
|
||||||
Request model for lyric search.
|
|
||||||
|
|
||||||
Attributes:
|
|
||||||
- **artist** (str): Artist.
|
|
||||||
- **song** (str): Song.
|
|
||||||
- **duration** (Optional[int]): Optional duration.
|
|
||||||
"""
|
|
||||||
|
|
||||||
artist: Optional[str] = None
|
|
||||||
song: Optional[str] = None
|
|
||||||
duration: Optional[int] = None
|
|
||||||
|
|
||||||
model_config = {
|
|
||||||
"json_schema_extra": {"examples": [{"artist": "eminem", "song": "rap god"}]}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ValidTypeAheadRequest(BaseModel):
|
class ValidTypeAheadRequest(BaseModel):
|
||||||
"""
|
"""
|
||||||
Request model for typeahead query.
|
Request model for typeahead query.
|
||||||
|
|||||||
@@ -1,199 +0,0 @@
|
|||||||
import urllib.parse
|
|
||||||
from fastapi import FastAPI, HTTPException, Depends
|
|
||||||
from fastapi_throttle import RateLimiter
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from typing import Type, Optional
|
|
||||||
from sqlalchemy import (
|
|
||||||
and_,
|
|
||||||
true,
|
|
||||||
Column,
|
|
||||||
Integer,
|
|
||||||
String,
|
|
||||||
Float,
|
|
||||||
Boolean,
|
|
||||||
DateTime,
|
|
||||||
ForeignKey,
|
|
||||||
UniqueConstraint,
|
|
||||||
create_engine,
|
|
||||||
)
|
|
||||||
from sqlalchemy.orm import Session, relationship
|
|
||||||
from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta
|
|
||||||
from sqlalchemy.orm import sessionmaker
|
|
||||||
from .constructors import ValidLRCLibRequest
|
|
||||||
from lyric_search.constructors import LRCLibResult
|
|
||||||
from lyric_search import notifier
|
|
||||||
from sqlalchemy.orm import foreign
|
|
||||||
|
|
||||||
Base: Type[DeclarativeMeta] = declarative_base()
|
|
||||||
|
|
||||||
|
|
||||||
class Tracks(Base): # type: ignore
|
|
||||||
__tablename__ = "tracks"
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
||||||
name = Column(String)
|
|
||||||
name_lower = Column(String, index=True)
|
|
||||||
artist_name = Column(String)
|
|
||||||
artist_name_lower = Column(String, index=True)
|
|
||||||
album_name = Column(String)
|
|
||||||
album_name_lower = Column(String, index=True)
|
|
||||||
duration = Column(Float, index=True)
|
|
||||||
last_lyrics_id = Column(Integer, ForeignKey("lyrics.id"), index=True)
|
|
||||||
created_at = Column(DateTime)
|
|
||||||
updated_at = Column(DateTime)
|
|
||||||
|
|
||||||
# Relationships
|
|
||||||
lyrics = relationship(
|
|
||||||
"Lyrics",
|
|
||||||
back_populates="track",
|
|
||||||
foreign_keys=[last_lyrics_id],
|
|
||||||
primaryjoin="Tracks.id == foreign(Lyrics.track_id)", # Use string reference for Lyrics
|
|
||||||
)
|
|
||||||
|
|
||||||
# Constraints
|
|
||||||
__table_args__ = (
|
|
||||||
UniqueConstraint(
|
|
||||||
"name_lower",
|
|
||||||
"artist_name_lower",
|
|
||||||
"album_name_lower",
|
|
||||||
"duration",
|
|
||||||
name="uq_tracks",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Lyrics(Base): # type: ignore
|
|
||||||
__tablename__ = "lyrics"
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
||||||
plain_lyrics = Column(String)
|
|
||||||
synced_lyrics = Column(String)
|
|
||||||
track_id = Column(Integer, ForeignKey("tracks.id"), index=True)
|
|
||||||
has_plain_lyrics = Column(Boolean, index=True)
|
|
||||||
has_synced_lyrics = Column(Boolean, index=True)
|
|
||||||
instrumental = Column(Boolean)
|
|
||||||
source = Column(String, index=True)
|
|
||||||
created_at = Column(DateTime, index=True)
|
|
||||||
updated_at = Column(DateTime)
|
|
||||||
|
|
||||||
# Relationships
|
|
||||||
track = relationship(
|
|
||||||
"Tracks",
|
|
||||||
back_populates="lyrics",
|
|
||||||
foreign_keys=[track_id],
|
|
||||||
primaryjoin=(Tracks.id == foreign(track_id)),
|
|
||||||
remote_side=Tracks.id,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
DATABASE_URL: str = "sqlite:////nvme/sqlite_dbs/lrclib.db"
|
|
||||||
engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
|
|
||||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
|
||||||
|
|
||||||
|
|
||||||
def get_db():
|
|
||||||
db = SessionLocal()
|
|
||||||
try:
|
|
||||||
yield db
|
|
||||||
finally:
|
|
||||||
db.close()
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
TODO:
|
|
||||||
- Move retrieval to lyric_search.sources, with separate file for DB Model
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class LRCLib(FastAPI):
|
|
||||||
"""
|
|
||||||
LRCLib Cache Search Endpoint
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, app: FastAPI, util, constants) -> None:
|
|
||||||
"""Initialize LyricSearch endpoints."""
|
|
||||||
self.app: FastAPI = app
|
|
||||||
self.util = util
|
|
||||||
self.constants = constants
|
|
||||||
self.declarative_base = declarative_base()
|
|
||||||
self.notifier = notifier.DiscordNotifier()
|
|
||||||
|
|
||||||
self.endpoints: dict = {
|
|
||||||
"lrclib/search": self.lyric_search_handler,
|
|
||||||
}
|
|
||||||
|
|
||||||
for endpoint, handler in self.endpoints.items():
|
|
||||||
times: int = 20
|
|
||||||
seconds: int = 2
|
|
||||||
rate_limit: tuple[int, int] = (2, 3) # Default; (Times, Seconds)
|
|
||||||
(times, seconds) = rate_limit
|
|
||||||
|
|
||||||
app.add_api_route(
|
|
||||||
f"/{endpoint}",
|
|
||||||
handler,
|
|
||||||
methods=["POST"],
|
|
||||||
include_in_schema=True,
|
|
||||||
dependencies=[Depends(RateLimiter(times=times, seconds=seconds))],
|
|
||||||
)
|
|
||||||
|
|
||||||
async def lyric_search_handler(
|
|
||||||
self, data: ValidLRCLibRequest, db: Session = Depends(get_db)
|
|
||||||
) -> JSONResponse:
|
|
||||||
"""
|
|
||||||
Search for lyrics.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- **data** (ValidLRCLibRequest): Request containing artist, song, and other parameters.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- **JSONResponse**: LRCLib data or error.
|
|
||||||
"""
|
|
||||||
if not data.artist or not data.song:
|
|
||||||
raise HTTPException(detail="Invalid request", status_code=500)
|
|
||||||
|
|
||||||
search_artist: str = urllib.parse.unquote(data.artist).lower()
|
|
||||||
search_song: str = urllib.parse.unquote(data.song).lower()
|
|
||||||
search_duration: Optional[int] = data.duration
|
|
||||||
|
|
||||||
if not isinstance(search_artist, str) or not isinstance(search_song, str):
|
|
||||||
return JSONResponse(
|
|
||||||
status_code=500,
|
|
||||||
content={
|
|
||||||
"err": True,
|
|
||||||
"errorText": "Invalid request",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
query = (
|
|
||||||
db.query(
|
|
||||||
Tracks.id.label("id"),
|
|
||||||
Tracks.artist_name.label("artist"),
|
|
||||||
Tracks.name.label("song"),
|
|
||||||
Lyrics.plain_lyrics.label("plainLyrics"),
|
|
||||||
Lyrics.synced_lyrics.label("syncedLyrics"),
|
|
||||||
)
|
|
||||||
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
|
||||||
.filter(
|
|
||||||
and_(
|
|
||||||
Tracks.artist_name_lower == search_artist,
|
|
||||||
Tracks.name == search_song,
|
|
||||||
Tracks.duration == search_duration if search_duration else true(),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
db_result = query.first()
|
|
||||||
if not db_result:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code=404, content={"err": True, "errorText": "No result found."}
|
|
||||||
)
|
|
||||||
|
|
||||||
result = LRCLibResult(
|
|
||||||
id=db_result.id,
|
|
||||||
artist=db_result.artist,
|
|
||||||
song=db_result.song,
|
|
||||||
plainLyrics=db_result.plainLyrics,
|
|
||||||
syncedLyrics=db_result.syncedLyrics,
|
|
||||||
)
|
|
||||||
|
|
||||||
return JSONResponse(content=vars(result))
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Union, Optional
|
from typing import Union
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -22,25 +22,6 @@ class LyricsResult:
|
|||||||
time: float = 0.00
|
time: float = 0.00
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class LRCLibResult:
|
|
||||||
"""
|
|
||||||
Class for returned Lyrics Results
|
|
||||||
Attributes:
|
|
||||||
id (int): returned id
|
|
||||||
artist (str): returned artist
|
|
||||||
song (str): returned song
|
|
||||||
plainLyrics (str): returned (plain) lyrics
|
|
||||||
syncedLyrics (str): returned synchronizedLyrics
|
|
||||||
"""
|
|
||||||
|
|
||||||
id: int
|
|
||||||
artist: str
|
|
||||||
song: str
|
|
||||||
plainLyrics: Optional[str] = None
|
|
||||||
syncedLyrics: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Generic
|
Generic
|
||||||
"""
|
"""
|
||||||
|
|||||||
110
lyric_search/models.py
Normal file
110
lyric_search/models.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
"""
|
||||||
|
Database models for LRCLib lyrics cache.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import urllib.parse
|
||||||
|
from typing import Type, AsyncGenerator
|
||||||
|
from sqlalchemy import (
|
||||||
|
Column,
|
||||||
|
Integer,
|
||||||
|
String,
|
||||||
|
Float,
|
||||||
|
Boolean,
|
||||||
|
DateTime,
|
||||||
|
ForeignKey,
|
||||||
|
UniqueConstraint,
|
||||||
|
)
|
||||||
|
from sqlalchemy.orm import relationship, foreign
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine, AsyncSession
|
||||||
|
from sqlalchemy.ext.asyncio import async_sessionmaker
|
||||||
|
|
||||||
|
Base: Type[DeclarativeMeta] = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class Tracks(Base): # type: ignore
|
||||||
|
"""Tracks table - stores track metadata."""
|
||||||
|
__tablename__ = "tracks"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
name = Column(String, index=True)
|
||||||
|
name_lower = Column(String, index=True)
|
||||||
|
artist_name = Column(String, index=True)
|
||||||
|
artist_name_lower = Column(String, index=True)
|
||||||
|
album_name = Column(String)
|
||||||
|
album_name_lower = Column(String, index=True)
|
||||||
|
duration = Column(Float, index=True)
|
||||||
|
last_lyrics_id = Column(Integer, ForeignKey("lyrics.id"), index=True)
|
||||||
|
created_at = Column(DateTime)
|
||||||
|
updated_at = Column(DateTime)
|
||||||
|
|
||||||
|
# Relationships
|
||||||
|
lyrics = relationship(
|
||||||
|
"Lyrics",
|
||||||
|
back_populates="track",
|
||||||
|
foreign_keys=[last_lyrics_id],
|
||||||
|
primaryjoin="Tracks.id == foreign(Lyrics.track_id)",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Constraints
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint(
|
||||||
|
"name_lower",
|
||||||
|
"artist_name_lower",
|
||||||
|
"album_name_lower",
|
||||||
|
"duration",
|
||||||
|
name="uq_tracks",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Lyrics(Base): # type: ignore
|
||||||
|
"""Lyrics table - stores lyrics content."""
|
||||||
|
__tablename__ = "lyrics"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
plain_lyrics = Column(String)
|
||||||
|
synced_lyrics = Column(String)
|
||||||
|
track_id = Column(Integer, ForeignKey("tracks.id"), index=True)
|
||||||
|
has_plain_lyrics = Column(Boolean, index=True)
|
||||||
|
has_synced_lyrics = Column(Boolean, index=True)
|
||||||
|
instrumental = Column(Boolean)
|
||||||
|
source = Column(String, index=True)
|
||||||
|
created_at = Column(DateTime, index=True)
|
||||||
|
updated_at = Column(DateTime)
|
||||||
|
|
||||||
|
# Relationships
|
||||||
|
track = relationship(
|
||||||
|
"Tracks",
|
||||||
|
back_populates="lyrics",
|
||||||
|
foreign_keys=[track_id],
|
||||||
|
primaryjoin=(Tracks.id == foreign(track_id)),
|
||||||
|
remote_side=Tracks.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# PostgreSQL connection - using environment variables
|
||||||
|
POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
|
||||||
|
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
|
||||||
|
POSTGRES_DB = os.getenv("POSTGRES_DB", "lrclib")
|
||||||
|
POSTGRES_USER = os.getenv("POSTGRES_USER", "api")
|
||||||
|
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
|
||||||
|
|
||||||
|
# URL-encode the password to handle special characters
|
||||||
|
encoded_password = urllib.parse.quote_plus(POSTGRES_PASSWORD)
|
||||||
|
|
||||||
|
DATABASE_URL: str = f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
|
||||||
|
async_engine: AsyncEngine = create_async_engine(
|
||||||
|
DATABASE_URL,
|
||||||
|
pool_size=20,
|
||||||
|
max_overflow=10,
|
||||||
|
pool_pre_ping=True,
|
||||||
|
echo=False
|
||||||
|
)
|
||||||
|
AsyncSessionLocal = async_sessionmaker(bind=async_engine, expire_on_commit=False)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_async_db():
|
||||||
|
"""Get async database session."""
|
||||||
|
async with AsyncSessionLocal() as session:
|
||||||
|
yield session
|
||||||
@@ -14,9 +14,7 @@ class Aggregate:
|
|||||||
Aggregate all source methods
|
Aggregate all source methods
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, exclude_methods=None) -> None:
|
def __init__(self, exclude_methods: list = []) -> None:
|
||||||
if not exclude_methods:
|
|
||||||
exclude_methods: list = []
|
|
||||||
self.exclude_methods = exclude_methods
|
self.exclude_methods = exclude_methods
|
||||||
self.redis_cache = redis_cache.RedisCache()
|
self.redis_cache = redis_cache.RedisCache()
|
||||||
self.notifier = notifier.DiscordNotifier()
|
self.notifier = notifier.DiscordNotifier()
|
||||||
@@ -70,14 +68,14 @@ class Aggregate:
|
|||||||
if plain: # do not record LRC fails
|
if plain: # do not record LRC fails
|
||||||
try:
|
try:
|
||||||
await self.redis_cache.increment_found_count("failed")
|
await self.redis_cache.increment_found_count("failed")
|
||||||
self.notifier.send(
|
await self.notifier.send(
|
||||||
"WARNING",
|
"WARNING",
|
||||||
f"Could not find {artist} - {song} via queried sources.",
|
f"Could not find {artist} - {song} via queried sources.",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
logging.info("Could not increment redis failed counter: %s", str(e))
|
logging.info("Could not increment redis failed counter: %s", str(e))
|
||||||
self.notifier.send(
|
await self.notifier.send(
|
||||||
f"ERROR @ {__file__.rsplit('/', maxsplit=1)[-1]}",
|
f"ERROR @ {__file__.rsplit('/', maxsplit=1)[-1]}",
|
||||||
f"Could not increment redis failed counter: {str(e)}",
|
f"Could not increment redis failed counter: {str(e)}",
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -45,11 +45,11 @@ class Genius:
|
|||||||
Optional[LyricsResult]: The result, if found - None otherwise.
|
Optional[LyricsResult]: The result, if found - None otherwise.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
artist = artist.strip().lower()
|
artist_name = artist.strip().lower()
|
||||||
song = song.strip().lower()
|
song_name = song.strip().lower()
|
||||||
time_start: float = time.time()
|
time_start: float = time.time()
|
||||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
logging.info("Searching %s - %s on %s", artist_name, song_name, self.label)
|
||||||
search_term: str = f"{artist}%20{song}"
|
search_term: str = f"{artist_name}%20{song_name}"
|
||||||
returned_lyrics: str = ""
|
returned_lyrics: str = ""
|
||||||
async with ClientSession() as client:
|
async with ClientSession() as client:
|
||||||
async with client.get(
|
async with client.get(
|
||||||
@@ -100,10 +100,13 @@ class Genius:
|
|||||||
)
|
)
|
||||||
for returned in possible_matches
|
for returned in possible_matches
|
||||||
]
|
]
|
||||||
searched: str = f"{artist} - {song}"
|
searched: str = f"{artist_name} - {song_name}"
|
||||||
best_match: tuple = self.matcher.find_best_match(
|
best_match: Optional[tuple] = self.matcher.find_best_match(
|
||||||
input_track=searched, candidate_tracks=to_scrape
|
input_track=searched, candidate_tracks=to_scrape
|
||||||
)
|
)
|
||||||
|
if not best_match:
|
||||||
|
raise InvalidGeniusResponseException("No matching result")
|
||||||
|
|
||||||
logging.info("To scrape: %s", to_scrape)
|
logging.info("To scrape: %s", to_scrape)
|
||||||
((scrape_stub, track), confidence) = best_match
|
((scrape_stub, track), confidence) = best_match
|
||||||
scrape_url: str = f"{self.genius_url}{scrape_stub[1:]}"
|
scrape_url: str = f"{self.genius_url}{scrape_stub[1:]}"
|
||||||
@@ -157,8 +160,8 @@ class Genius:
|
|||||||
returned_lyrics: str = self.datautils.scrub_lyrics(
|
returned_lyrics: str = self.datautils.scrub_lyrics(
|
||||||
returned_lyrics
|
returned_lyrics
|
||||||
)
|
)
|
||||||
artist: str = track.split(" - ", maxsplit=1)[0]
|
artist = track.split(" - ", maxsplit=1)[0]
|
||||||
song: str = track.split(" - ", maxsplit=1)[1]
|
song = track.split(" - ", maxsplit=1)[1]
|
||||||
logging.info("Result found on %s", self.label)
|
logging.info("Result found on %s", self.label)
|
||||||
time_end: float = time.time()
|
time_end: float = time.time()
|
||||||
time_diff: float = time_end - time_start
|
time_diff: float = time_end - time_start
|
||||||
|
|||||||
@@ -1,33 +1,25 @@
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
import traceback
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional, Union
|
from typing import Optional
|
||||||
from aiohttp import ClientTimeout, ClientSession
|
from sqlalchemy.future import select
|
||||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
|
||||||
from lyric_search import utils
|
from lyric_search import utils
|
||||||
from lyric_search.constructors import LyricsResult
|
from lyric_search.constructors import LyricsResult
|
||||||
from . import common, cache, redis_cache
|
from lyric_search.models import Tracks, Lyrics, AsyncSessionLocal
|
||||||
from lyric_search.constructors import InvalidLRCLibResponseException
|
from . import redis_cache
|
||||||
|
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
log_level = logging.getLevelName(logger.level)
|
log_level = logging.getLevelName(logger.level)
|
||||||
|
|
||||||
|
|
||||||
class LRCLib:
|
class LRCLib:
|
||||||
"""LRCLib Search Module"""
|
"""LRCLib Search Module - Local PostgreSQL Database"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.label: str = "LRCLib"
|
self.label: str = "LRCLib-Cache"
|
||||||
self.lrclib_url: str = "https://lrclib.net/api/search"
|
|
||||||
self.headers: dict = common.SCRAPE_HEADERS
|
|
||||||
self.timeout = ClientTimeout(connect=3, sock_read=8)
|
|
||||||
self.datautils = utils.DataUtils()
|
self.datautils = utils.DataUtils()
|
||||||
self.matcher = utils.TrackMatcher()
|
self.matcher = utils.TrackMatcher()
|
||||||
self.cache = cache.Cache()
|
|
||||||
self.redis_cache = redis_cache.RedisCache()
|
self.redis_cache = redis_cache.RedisCache()
|
||||||
|
|
||||||
@retry(stop=stop_after_attempt(2), wait=wait_fixed(0.5))
|
|
||||||
async def search(
|
async def search(
|
||||||
self,
|
self,
|
||||||
artist: str,
|
artist: str,
|
||||||
@@ -36,10 +28,12 @@ class LRCLib:
|
|||||||
duration: Optional[int] = None,
|
duration: Optional[int] = None,
|
||||||
) -> Optional[LyricsResult]:
|
) -> Optional[LyricsResult]:
|
||||||
"""
|
"""
|
||||||
LRCLib Search
|
LRCLib Local Database Search
|
||||||
Args:
|
Args:
|
||||||
artist (str): the artist to search
|
artist (str): the artist to search
|
||||||
song (str): the song to search
|
song (str): the song to search
|
||||||
|
plain (bool): return plain lyrics (True) or synced lyrics (False)
|
||||||
|
duration (int): optional track duration for better matching
|
||||||
Returns:
|
Returns:
|
||||||
Optional[LyricsResult]: The result, if found - None otherwise.
|
Optional[LyricsResult]: The result, if found - None otherwise.
|
||||||
"""
|
"""
|
||||||
@@ -47,140 +41,115 @@ class LRCLib:
|
|||||||
artist = artist.strip().lower()
|
artist = artist.strip().lower()
|
||||||
song = song.strip().lower()
|
song = song.strip().lower()
|
||||||
time_start: float = time.time()
|
time_start: float = time.time()
|
||||||
lrc_obj: Optional[list[dict]] = None
|
|
||||||
|
|
||||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
||||||
|
|
||||||
input_track: str = f"{artist} - {song}"
|
async with AsyncSessionLocal() as db:
|
||||||
returned_lyrics: str = ""
|
best_match = None
|
||||||
async with ClientSession() as client:
|
|
||||||
async with await client.get(
|
|
||||||
self.lrclib_url,
|
|
||||||
params={
|
|
||||||
"artist_name": artist,
|
|
||||||
"track_name": song,
|
|
||||||
**({"duration": duration} if duration else {}),
|
|
||||||
},
|
|
||||||
timeout=self.timeout,
|
|
||||||
headers=self.headers,
|
|
||||||
) as request:
|
|
||||||
request.raise_for_status()
|
|
||||||
|
|
||||||
text: Optional[str] = await request.text()
|
# Try exact match first (fastest)
|
||||||
if not text:
|
result = await db.execute(
|
||||||
raise InvalidLRCLibResponseException("No search response.")
|
select(
|
||||||
if len(text) < 100:
|
Tracks.artist_name,
|
||||||
raise InvalidLRCLibResponseException(
|
Tracks.name,
|
||||||
"Search response text was invalid (len < 100 chars.)"
|
Lyrics.plain_lyrics,
|
||||||
)
|
Lyrics.synced_lyrics,
|
||||||
|
|
||||||
search_data: Optional[Union[list, dict]] = await request.json()
|
|
||||||
if not isinstance(search_data, list | dict):
|
|
||||||
raise InvalidLRCLibResponseException("No JSON search data.")
|
|
||||||
|
|
||||||
# logging.info("Search Data:\n%s", search_data)
|
|
||||||
|
|
||||||
if not isinstance(search_data, list):
|
|
||||||
raise InvalidLRCLibResponseException("Invalid JSON.")
|
|
||||||
|
|
||||||
# Filter by duration if provided
|
|
||||||
if duration:
|
|
||||||
search_data = [
|
|
||||||
r
|
|
||||||
for r in search_data
|
|
||||||
if abs(r.get("duration", 0) - duration) <= 10
|
|
||||||
]
|
|
||||||
|
|
||||||
if plain:
|
|
||||||
possible_matches = [
|
|
||||||
(
|
|
||||||
x,
|
|
||||||
f"{result.get('artistName')} - {result.get('trackName')}",
|
|
||||||
)
|
|
||||||
for x, result in enumerate(search_data)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
logging.info(
|
|
||||||
"Limiting possible matches to only those with non-null syncedLyrics"
|
|
||||||
)
|
|
||||||
possible_matches = [
|
|
||||||
(
|
|
||||||
x,
|
|
||||||
f"{result.get('artistName')} - {result.get('trackName')}",
|
|
||||||
)
|
|
||||||
for x, result in enumerate(search_data)
|
|
||||||
if isinstance(result["syncedLyrics"], str)
|
|
||||||
]
|
|
||||||
|
|
||||||
best_match = None
|
|
||||||
try:
|
|
||||||
match_result = self.matcher.find_best_match(
|
|
||||||
input_track,
|
|
||||||
possible_matches, # type: ignore
|
|
||||||
)
|
|
||||||
if match_result:
|
|
||||||
best_match = match_result[0]
|
|
||||||
except: # noqa
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not best_match:
|
|
||||||
return
|
|
||||||
best_match_id = best_match[0]
|
|
||||||
|
|
||||||
if not isinstance(search_data[best_match_id]["artistName"], str):
|
|
||||||
raise InvalidLRCLibResponseException(
|
|
||||||
f"Invalid JSON: Cannot find artistName key.\n{search_data}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not isinstance(search_data[best_match_id]["trackName"], str):
|
|
||||||
raise InvalidLRCLibResponseException(
|
|
||||||
f"Invalid JSON: Cannot find trackName key.\n{search_data}"
|
|
||||||
)
|
|
||||||
|
|
||||||
returned_artist: str = search_data[best_match_id]["artistName"]
|
|
||||||
returned_song: str = search_data[best_match_id]["trackName"]
|
|
||||||
if plain:
|
|
||||||
if not isinstance(
|
|
||||||
search_data[best_match_id]["plainLyrics"], str
|
|
||||||
):
|
|
||||||
raise InvalidLRCLibResponseException(
|
|
||||||
f"Invalid JSON: Cannot find plainLyrics key.\n{search_data}"
|
|
||||||
)
|
|
||||||
returned_lyrics: str = search_data[best_match_id]["plainLyrics"]
|
|
||||||
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
|
|
||||||
else:
|
|
||||||
if not isinstance(
|
|
||||||
search_data[best_match_id]["syncedLyrics"], str
|
|
||||||
):
|
|
||||||
raise InvalidLRCLibResponseException(
|
|
||||||
f"Invalid JSON: Cannot find syncedLyrics key.\n{search_data}"
|
|
||||||
)
|
|
||||||
returned_lyrics: str = search_data[best_match_id][
|
|
||||||
"syncedLyrics"
|
|
||||||
]
|
|
||||||
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
|
|
||||||
returned_track: str = f"{returned_artist} - {returned_song}"
|
|
||||||
match_result = self.matcher.find_best_match(
|
|
||||||
input_track=input_track, candidate_tracks=[(0, returned_track)]
|
|
||||||
)
|
)
|
||||||
if not match_result:
|
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||||
return # No suitable match found
|
.filter(
|
||||||
_matched, confidence = match_result
|
Tracks.artist_name_lower == artist,
|
||||||
logging.info("Result found on %s", self.label)
|
Tracks.name_lower == song,
|
||||||
time_end: float = time.time()
|
|
||||||
time_diff: float = time_end - time_start
|
|
||||||
matched = LyricsResult(
|
|
||||||
artist=returned_artist,
|
|
||||||
song=returned_song,
|
|
||||||
src=self.label,
|
|
||||||
lyrics=returned_lyrics if plain else lrc_obj, # type: ignore
|
|
||||||
confidence=confidence,
|
|
||||||
time=time_diff,
|
|
||||||
)
|
)
|
||||||
await self.redis_cache.increment_found_count(self.label)
|
.limit(1)
|
||||||
if plain:
|
)
|
||||||
await self.cache.store(matched)
|
best_match = result.first()
|
||||||
return matched
|
|
||||||
|
# If no exact match, try prefix match (faster than full ILIKE)
|
||||||
|
if not best_match:
|
||||||
|
result = await db.execute(
|
||||||
|
select(
|
||||||
|
Tracks.artist_name,
|
||||||
|
Tracks.name,
|
||||||
|
Lyrics.plain_lyrics,
|
||||||
|
Lyrics.synced_lyrics,
|
||||||
|
)
|
||||||
|
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||||
|
.filter(
|
||||||
|
Tracks.artist_name_lower.like(f"{artist}%"),
|
||||||
|
Tracks.name_lower.like(f"{song}%"),
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
best_match = result.first()
|
||||||
|
|
||||||
|
# If still no match, try full ILIKE (slowest)
|
||||||
|
if not best_match:
|
||||||
|
result = await db.execute(
|
||||||
|
select(
|
||||||
|
Tracks.artist_name,
|
||||||
|
Tracks.name,
|
||||||
|
Lyrics.plain_lyrics,
|
||||||
|
Lyrics.synced_lyrics,
|
||||||
|
)
|
||||||
|
.join(Lyrics, Tracks.id == Lyrics.track_id)
|
||||||
|
.filter(
|
||||||
|
Tracks.artist_name_lower.ilike(f"%{artist}%"),
|
||||||
|
Tracks.name_lower.ilike(f"%{song}%"),
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
best_match = result.first()
|
||||||
|
|
||||||
|
if not best_match:
|
||||||
|
logging.info("No result found on %s", self.label)
|
||||||
|
return None
|
||||||
|
|
||||||
|
returned_artist = best_match.artist_name
|
||||||
|
returned_song = best_match.name
|
||||||
|
|
||||||
|
if plain:
|
||||||
|
if not best_match.plain_lyrics:
|
||||||
|
logging.info("No plain lyrics available on %s", self.label)
|
||||||
|
return None
|
||||||
|
returned_lyrics = best_match.plain_lyrics
|
||||||
|
returned_lyrics = self.datautils.scrub_lyrics(returned_lyrics)
|
||||||
|
lrc_obj = None
|
||||||
|
else:
|
||||||
|
if not best_match.synced_lyrics:
|
||||||
|
logging.info("No synced lyrics available on %s", self.label)
|
||||||
|
return None
|
||||||
|
returned_lyrics = best_match.synced_lyrics
|
||||||
|
lrc_obj = self.datautils.create_lrc_object(returned_lyrics)
|
||||||
|
|
||||||
|
# Calculate match confidence
|
||||||
|
input_track = f"{artist} - {song}"
|
||||||
|
returned_track = f"{returned_artist} - {returned_song}"
|
||||||
|
match_result = self.matcher.find_best_match(
|
||||||
|
input_track=input_track,
|
||||||
|
candidate_tracks=[(0, returned_track)]
|
||||||
|
)
|
||||||
|
|
||||||
|
if not match_result:
|
||||||
|
return None
|
||||||
|
|
||||||
|
_matched, confidence = match_result
|
||||||
|
|
||||||
|
logging.info("Result found on %s", self.label)
|
||||||
|
time_end = time.time()
|
||||||
|
time_diff = time_end - time_start
|
||||||
|
|
||||||
|
matched = LyricsResult(
|
||||||
|
artist=returned_artist,
|
||||||
|
song=returned_song,
|
||||||
|
src=self.label,
|
||||||
|
lyrics=returned_lyrics if plain else lrc_obj, # type: ignore
|
||||||
|
confidence=confidence,
|
||||||
|
time=time_diff,
|
||||||
|
)
|
||||||
|
|
||||||
|
await self.redis_cache.increment_found_count(self.label)
|
||||||
|
return matched
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.debug("Exception: %s", str(e))
|
logging.error("Exception in %s: %s", self.label, str(e))
|
||||||
traceback.print_exc()
|
return None
|
||||||
|
|||||||
0
pgloader_config.load
Normal file
0
pgloader_config.load
Normal file
0
postgres_schema.sql
Normal file
0
postgres_schema.sql
Normal file
Reference in New Issue
Block a user