- Implemented a new script to upsert data from SQLite dumps into PostgreSQL asynchronously.
- Added detailed reporting for each table processed, including success and error statuses. - Integrated Discord notifications for various stages of the upsert process. - Included functionality to fetch the latest SQLite dump from a specified URL. - Added error handling and state management for the upsert process. Resolves #34
This commit is contained in:
@@ -245,9 +245,9 @@ class LyricSearch(FastAPI):
|
||||
if i + line_count <= len(lyric_lines):
|
||||
# Combine consecutive lines with space separator
|
||||
combined_lines = []
|
||||
line_positions: list[
|
||||
tuple[int, int]
|
||||
] = [] # Track where each line starts in combined text
|
||||
line_positions: list[tuple[int, int]] = (
|
||||
[]
|
||||
) # Track where each line starts in combined text
|
||||
combined_text_parts: list[str] = []
|
||||
|
||||
for j in range(line_count):
|
||||
|
||||
@@ -99,7 +99,9 @@ POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
|
||||
# URL-encode the password to handle special characters
|
||||
encoded_password = urllib.parse.quote_plus(POSTGRES_PASSWORD)
|
||||
|
||||
DATABASE_URL: str = f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
|
||||
DATABASE_URL: str = (
|
||||
f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
|
||||
)
|
||||
async_engine: AsyncEngine = create_async_engine(
|
||||
DATABASE_URL, pool_size=20, max_overflow=10, pool_pre_ping=True, echo=False
|
||||
)
|
||||
|
||||
@@ -91,8 +91,10 @@ class Cache:
|
||||
logging.debug(
|
||||
"Checking whether %s is already stored", artistsong.replace("\n", " - ")
|
||||
)
|
||||
check_query: str = 'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
||||
check_query: str = (
|
||||
'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
||||
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1'
|
||||
)
|
||||
artistsong_split = artistsong.split("\n", maxsplit=1)
|
||||
artist = artistsong_split[0].lower()
|
||||
song = artistsong_split[1].lower()
|
||||
@@ -213,10 +215,8 @@ class Cache:
|
||||
lyrics = regex.sub(r"(<br>|\n|\r\n)", " / ", lyr_result.lyrics.strip())
|
||||
lyrics = regex.sub(r"\s{2,}", " ", lyrics)
|
||||
|
||||
insert_query = (
|
||||
"INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
|
||||
insert_query = "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?)"
|
||||
)
|
||||
params = (
|
||||
lyr_result.src,
|
||||
time.time(),
|
||||
@@ -260,8 +260,10 @@ class Cache:
|
||||
|
||||
if artist == "!" and song == "!":
|
||||
random_search = True
|
||||
search_query: str = "SELECT id, artist, song, lyrics, src, confidence\
|
||||
search_query: str = (
|
||||
"SELECT id, artist, song, lyrics, src, confidence\
|
||||
FROM lyrics ORDER BY RANDOM() LIMIT 1"
|
||||
)
|
||||
|
||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
||||
|
||||
@@ -320,9 +322,11 @@ class Cache:
|
||||
self.cache_pre_query
|
||||
) as _db_cursor:
|
||||
if not random_search:
|
||||
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
|
||||
search_query: str = (
|
||||
'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
|
||||
WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
||||
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
|
||||
)
|
||||
search_params: tuple = (
|
||||
artist.strip(),
|
||||
song.strip(),
|
||||
|
||||
@@ -111,9 +111,8 @@ class DataUtils:
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.lrc_regex = (
|
||||
regex.compile( # capture mm:ss and optional .xxx, then the lyric text
|
||||
r"""
|
||||
self.lrc_regex = regex.compile( # capture mm:ss and optional .xxx, then the lyric text
|
||||
r"""
|
||||
\[ # literal “[”
|
||||
( # 1st (and only) capture group:
|
||||
[0-9]{2} # two-digit minutes
|
||||
@@ -124,8 +123,7 @@ class DataUtils:
|
||||
\s* # optional whitespace
|
||||
(.*) # capture the rest of the line as words
|
||||
""",
|
||||
regex.VERBOSE,
|
||||
)
|
||||
regex.VERBOSE,
|
||||
)
|
||||
self.scrub_regex_1: Pattern = regex.compile(r"(\[.*?\])(\s){0,}(\:){0,1}")
|
||||
self.scrub_regex_2: Pattern = regex.compile(
|
||||
|
||||
@@ -92,11 +92,7 @@ def get_redis_sync_client(decode_responses: bool = True) -> redis_sync.Redis:
|
||||
|
||||
async def close_redis_pools() -> None:
|
||||
"""Close Redis connections. Call on app shutdown."""
|
||||
global \
|
||||
_redis_async_pool, \
|
||||
_redis_async_client, \
|
||||
_redis_sync_client, \
|
||||
_redis_sync_client_decoded
|
||||
global _redis_async_pool, _redis_async_client, _redis_sync_client, _redis_sync_client_decoded
|
||||
|
||||
if _redis_async_client:
|
||||
await _redis_async_client.close()
|
||||
|
||||
2103
update_lrclib_db.py
Normal file
2103
update_lrclib_db.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -127,7 +127,9 @@ class MemeUtil:
|
||||
db_conn.row_factory = sqlite3.Row
|
||||
rows_per_page: int = 10
|
||||
offset: int = (page - 1) * rows_per_page
|
||||
query: str = "SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?"
|
||||
query: str = (
|
||||
"SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?"
|
||||
)
|
||||
async with await db_conn.execute(query, (offset,)) as db_cursor:
|
||||
results = await db_cursor.fetchall()
|
||||
for result in results:
|
||||
|
||||
@@ -1269,9 +1269,11 @@ class SRUtil:
|
||||
album,
|
||||
song,
|
||||
selected_artist,
|
||||
best_track.get("album", {}).get("title")
|
||||
if best_track.get("album")
|
||||
else None,
|
||||
(
|
||||
best_track.get("album", {}).get("title")
|
||||
if best_track.get("album")
|
||||
else None
|
||||
),
|
||||
selected_title,
|
||||
):
|
||||
# Try to find another candidate that does match metadata
|
||||
@@ -1294,9 +1296,11 @@ class SRUtil:
|
||||
album,
|
||||
song,
|
||||
cand_artist,
|
||||
candidate.get("album", {}).get("title")
|
||||
if candidate.get("album")
|
||||
else None,
|
||||
(
|
||||
candidate.get("album", {}).get("title")
|
||||
if candidate.get("album")
|
||||
else None
|
||||
),
|
||||
cand_title,
|
||||
):
|
||||
found_better = candidate
|
||||
|
||||
Reference in New Issue
Block a user