- Implemented a new script to upsert data from SQLite dumps into PostgreSQL asynchronously.

- Added detailed reporting for each table processed, including success and error statuses.
- Integrated Discord notifications for various stages of the upsert process.
- Included functionality to fetch the latest SQLite dump from a specified URL.
- Added error handling and state management for the upsert process.

Resolves #34
This commit is contained in:
2026-01-25 22:06:24 -05:00
parent 97fd7dd67d
commit 277804d212
8 changed files with 2136 additions and 27 deletions

View File

@@ -245,9 +245,9 @@ class LyricSearch(FastAPI):
if i + line_count <= len(lyric_lines): if i + line_count <= len(lyric_lines):
# Combine consecutive lines with space separator # Combine consecutive lines with space separator
combined_lines = [] combined_lines = []
line_positions: list[ line_positions: list[tuple[int, int]] = (
tuple[int, int] []
] = [] # Track where each line starts in combined text ) # Track where each line starts in combined text
combined_text_parts: list[str] = [] combined_text_parts: list[str] = []
for j in range(line_count): for j in range(line_count):

View File

@@ -99,7 +99,9 @@ POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
# URL-encode the password to handle special characters # URL-encode the password to handle special characters
encoded_password = urllib.parse.quote_plus(POSTGRES_PASSWORD) encoded_password = urllib.parse.quote_plus(POSTGRES_PASSWORD)
DATABASE_URL: str = f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}" DATABASE_URL: str = (
f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
)
async_engine: AsyncEngine = create_async_engine( async_engine: AsyncEngine = create_async_engine(
DATABASE_URL, pool_size=20, max_overflow=10, pool_pre_ping=True, echo=False DATABASE_URL, pool_size=20, max_overflow=10, pool_pre_ping=True, echo=False
) )

View File

@@ -91,8 +91,10 @@ class Cache:
logging.debug( logging.debug(
"Checking whether %s is already stored", artistsong.replace("\n", " - ") "Checking whether %s is already stored", artistsong.replace("\n", " - ")
) )
check_query: str = 'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\ check_query: str = (
'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1' <= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1'
)
artistsong_split = artistsong.split("\n", maxsplit=1) artistsong_split = artistsong.split("\n", maxsplit=1)
artist = artistsong_split[0].lower() artist = artistsong_split[0].lower()
song = artistsong_split[1].lower() song = artistsong_split[1].lower()
@@ -213,10 +215,8 @@ class Cache:
lyrics = regex.sub(r"(<br>|\n|\r\n)", " / ", lyr_result.lyrics.strip()) lyrics = regex.sub(r"(<br>|\n|\r\n)", " / ", lyr_result.lyrics.strip())
lyrics = regex.sub(r"\s{2,}", " ", lyrics) lyrics = regex.sub(r"\s{2,}", " ", lyrics)
insert_query = ( insert_query = "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
"INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
VALUES(?, ?, ?, ?, ?, ?, ?)" VALUES(?, ?, ?, ?, ?, ?, ?)"
)
params = ( params = (
lyr_result.src, lyr_result.src,
time.time(), time.time(),
@@ -260,8 +260,10 @@ class Cache:
if artist == "!" and song == "!": if artist == "!" and song == "!":
random_search = True random_search = True
search_query: str = "SELECT id, artist, song, lyrics, src, confidence\ search_query: str = (
"SELECT id, artist, song, lyrics, src, confidence\
FROM lyrics ORDER BY RANDOM() LIMIT 1" FROM lyrics ORDER BY RANDOM() LIMIT 1"
)
logging.info("Searching %s - %s on %s", artist, song, self.label) logging.info("Searching %s - %s on %s", artist, song, self.label)
@@ -320,9 +322,11 @@ class Cache:
self.cache_pre_query self.cache_pre_query
) as _db_cursor: ) as _db_cursor:
if not random_search: if not random_search:
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\ search_query: str = (
'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\ WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10' <= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
)
search_params: tuple = ( search_params: tuple = (
artist.strip(), artist.strip(),
song.strip(), song.strip(),

View File

@@ -111,9 +111,8 @@ class DataUtils:
""" """
def __init__(self) -> None: def __init__(self) -> None:
self.lrc_regex = ( self.lrc_regex = regex.compile( # capture mm:ss and optional .xxx, then the lyric text
regex.compile( # capture mm:ss and optional .xxx, then the lyric text r"""
r"""
\[ # literal “[” \[ # literal “[”
( # 1st (and only) capture group: ( # 1st (and only) capture group:
[0-9]{2} # two-digit minutes [0-9]{2} # two-digit minutes
@@ -124,8 +123,7 @@ class DataUtils:
\s* # optional whitespace \s* # optional whitespace
(.*) # capture the rest of the line as words (.*) # capture the rest of the line as words
""", """,
regex.VERBOSE, regex.VERBOSE,
)
) )
self.scrub_regex_1: Pattern = regex.compile(r"(\[.*?\])(\s){0,}(\:){0,1}") self.scrub_regex_1: Pattern = regex.compile(r"(\[.*?\])(\s){0,}(\:){0,1}")
self.scrub_regex_2: Pattern = regex.compile( self.scrub_regex_2: Pattern = regex.compile(

View File

@@ -92,11 +92,7 @@ def get_redis_sync_client(decode_responses: bool = True) -> redis_sync.Redis:
async def close_redis_pools() -> None: async def close_redis_pools() -> None:
"""Close Redis connections. Call on app shutdown.""" """Close Redis connections. Call on app shutdown."""
global \ global _redis_async_pool, _redis_async_client, _redis_sync_client, _redis_sync_client_decoded
_redis_async_pool, \
_redis_async_client, \
_redis_sync_client, \
_redis_sync_client_decoded
if _redis_async_client: if _redis_async_client:
await _redis_async_client.close() await _redis_async_client.close()

2103
update_lrclib_db.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -127,7 +127,9 @@ class MemeUtil:
db_conn.row_factory = sqlite3.Row db_conn.row_factory = sqlite3.Row
rows_per_page: int = 10 rows_per_page: int = 10
offset: int = (page - 1) * rows_per_page offset: int = (page - 1) * rows_per_page
query: str = "SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?" query: str = (
"SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?"
)
async with await db_conn.execute(query, (offset,)) as db_cursor: async with await db_conn.execute(query, (offset,)) as db_cursor:
results = await db_cursor.fetchall() results = await db_cursor.fetchall()
for result in results: for result in results:

View File

@@ -1269,9 +1269,11 @@ class SRUtil:
album, album,
song, song,
selected_artist, selected_artist,
best_track.get("album", {}).get("title") (
if best_track.get("album") best_track.get("album", {}).get("title")
else None, if best_track.get("album")
else None
),
selected_title, selected_title,
): ):
# Try to find another candidate that does match metadata # Try to find another candidate that does match metadata
@@ -1294,9 +1296,11 @@ class SRUtil:
album, album,
song, song,
cand_artist, cand_artist,
candidate.get("album", {}).get("title") (
if candidate.get("album") candidate.get("album", {}).get("title")
else None, if candidate.get("album")
else None
),
cand_title, cand_title,
): ):
found_better = candidate found_better = candidate