- Implemented a new script to upsert data from SQLite dumps into PostgreSQL asynchronously.
- Added detailed reporting for each table processed, including success and error statuses. - Integrated Discord notifications for various stages of the upsert process. - Included functionality to fetch the latest SQLite dump from a specified URL. - Added error handling and state management for the upsert process. Resolves #34
This commit is contained in:
@@ -245,9 +245,9 @@ class LyricSearch(FastAPI):
|
|||||||
if i + line_count <= len(lyric_lines):
|
if i + line_count <= len(lyric_lines):
|
||||||
# Combine consecutive lines with space separator
|
# Combine consecutive lines with space separator
|
||||||
combined_lines = []
|
combined_lines = []
|
||||||
line_positions: list[
|
line_positions: list[tuple[int, int]] = (
|
||||||
tuple[int, int]
|
[]
|
||||||
] = [] # Track where each line starts in combined text
|
) # Track where each line starts in combined text
|
||||||
combined_text_parts: list[str] = []
|
combined_text_parts: list[str] = []
|
||||||
|
|
||||||
for j in range(line_count):
|
for j in range(line_count):
|
||||||
|
|||||||
@@ -99,7 +99,9 @@ POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
|
|||||||
# URL-encode the password to handle special characters
|
# URL-encode the password to handle special characters
|
||||||
encoded_password = urllib.parse.quote_plus(POSTGRES_PASSWORD)
|
encoded_password = urllib.parse.quote_plus(POSTGRES_PASSWORD)
|
||||||
|
|
||||||
DATABASE_URL: str = f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
|
DATABASE_URL: str = (
|
||||||
|
f"postgresql+asyncpg://{POSTGRES_USER}:{encoded_password}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
|
||||||
|
)
|
||||||
async_engine: AsyncEngine = create_async_engine(
|
async_engine: AsyncEngine = create_async_engine(
|
||||||
DATABASE_URL, pool_size=20, max_overflow=10, pool_pre_ping=True, echo=False
|
DATABASE_URL, pool_size=20, max_overflow=10, pool_pre_ping=True, echo=False
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -91,8 +91,10 @@ class Cache:
|
|||||||
logging.debug(
|
logging.debug(
|
||||||
"Checking whether %s is already stored", artistsong.replace("\n", " - ")
|
"Checking whether %s is already stored", artistsong.replace("\n", " - ")
|
||||||
)
|
)
|
||||||
check_query: str = 'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
check_query: str = (
|
||||||
|
'SELECT id, artist, song FROM lyrics WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
||||||
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1'
|
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 1'
|
||||||
|
)
|
||||||
artistsong_split = artistsong.split("\n", maxsplit=1)
|
artistsong_split = artistsong.split("\n", maxsplit=1)
|
||||||
artist = artistsong_split[0].lower()
|
artist = artistsong_split[0].lower()
|
||||||
song = artistsong_split[1].lower()
|
song = artistsong_split[1].lower()
|
||||||
@@ -213,10 +215,8 @@ class Cache:
|
|||||||
lyrics = regex.sub(r"(<br>|\n|\r\n)", " / ", lyr_result.lyrics.strip())
|
lyrics = regex.sub(r"(<br>|\n|\r\n)", " / ", lyr_result.lyrics.strip())
|
||||||
lyrics = regex.sub(r"\s{2,}", " ", lyrics)
|
lyrics = regex.sub(r"\s{2,}", " ", lyrics)
|
||||||
|
|
||||||
insert_query = (
|
insert_query = "INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
|
||||||
"INSERT INTO lyrics (src, date_retrieved, artist, song, artistsong, confidence, lyrics)\
|
|
||||||
VALUES(?, ?, ?, ?, ?, ?, ?)"
|
VALUES(?, ?, ?, ?, ?, ?, ?)"
|
||||||
)
|
|
||||||
params = (
|
params = (
|
||||||
lyr_result.src,
|
lyr_result.src,
|
||||||
time.time(),
|
time.time(),
|
||||||
@@ -260,8 +260,10 @@ class Cache:
|
|||||||
|
|
||||||
if artist == "!" and song == "!":
|
if artist == "!" and song == "!":
|
||||||
random_search = True
|
random_search = True
|
||||||
search_query: str = "SELECT id, artist, song, lyrics, src, confidence\
|
search_query: str = (
|
||||||
|
"SELECT id, artist, song, lyrics, src, confidence\
|
||||||
FROM lyrics ORDER BY RANDOM() LIMIT 1"
|
FROM lyrics ORDER BY RANDOM() LIMIT 1"
|
||||||
|
)
|
||||||
|
|
||||||
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
logging.info("Searching %s - %s on %s", artist, song, self.label)
|
||||||
|
|
||||||
@@ -320,9 +322,11 @@ class Cache:
|
|||||||
self.cache_pre_query
|
self.cache_pre_query
|
||||||
) as _db_cursor:
|
) as _db_cursor:
|
||||||
if not random_search:
|
if not random_search:
|
||||||
search_query: str = 'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
|
search_query: str = (
|
||||||
|
'SELECT id, artist, song, lyrics, src, confidence FROM lyrics\
|
||||||
WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
WHERE editdist3((lower(artist) || " " || lower(song)), (? || " " || ?))\
|
||||||
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
|
<= 410 ORDER BY editdist3((lower(artist) || " " || lower(song)), ?) ASC LIMIT 10'
|
||||||
|
)
|
||||||
search_params: tuple = (
|
search_params: tuple = (
|
||||||
artist.strip(),
|
artist.strip(),
|
||||||
song.strip(),
|
song.strip(),
|
||||||
|
|||||||
@@ -111,8 +111,7 @@ class DataUtils:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.lrc_regex = (
|
self.lrc_regex = regex.compile( # capture mm:ss and optional .xxx, then the lyric text
|
||||||
regex.compile( # capture mm:ss and optional .xxx, then the lyric text
|
|
||||||
r"""
|
r"""
|
||||||
\[ # literal “[”
|
\[ # literal “[”
|
||||||
( # 1st (and only) capture group:
|
( # 1st (and only) capture group:
|
||||||
@@ -126,7 +125,6 @@ class DataUtils:
|
|||||||
""",
|
""",
|
||||||
regex.VERBOSE,
|
regex.VERBOSE,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
self.scrub_regex_1: Pattern = regex.compile(r"(\[.*?\])(\s){0,}(\:){0,1}")
|
self.scrub_regex_1: Pattern = regex.compile(r"(\[.*?\])(\s){0,}(\:){0,1}")
|
||||||
self.scrub_regex_2: Pattern = regex.compile(
|
self.scrub_regex_2: Pattern = regex.compile(
|
||||||
r"(\d?)(Embed\b)", flags=regex.IGNORECASE
|
r"(\d?)(Embed\b)", flags=regex.IGNORECASE
|
||||||
|
|||||||
@@ -92,11 +92,7 @@ def get_redis_sync_client(decode_responses: bool = True) -> redis_sync.Redis:
|
|||||||
|
|
||||||
async def close_redis_pools() -> None:
|
async def close_redis_pools() -> None:
|
||||||
"""Close Redis connections. Call on app shutdown."""
|
"""Close Redis connections. Call on app shutdown."""
|
||||||
global \
|
global _redis_async_pool, _redis_async_client, _redis_sync_client, _redis_sync_client_decoded
|
||||||
_redis_async_pool, \
|
|
||||||
_redis_async_client, \
|
|
||||||
_redis_sync_client, \
|
|
||||||
_redis_sync_client_decoded
|
|
||||||
|
|
||||||
if _redis_async_client:
|
if _redis_async_client:
|
||||||
await _redis_async_client.close()
|
await _redis_async_client.close()
|
||||||
|
|||||||
2103
update_lrclib_db.py
Normal file
2103
update_lrclib_db.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -127,7 +127,9 @@ class MemeUtil:
|
|||||||
db_conn.row_factory = sqlite3.Row
|
db_conn.row_factory = sqlite3.Row
|
||||||
rows_per_page: int = 10
|
rows_per_page: int = 10
|
||||||
offset: int = (page - 1) * rows_per_page
|
offset: int = (page - 1) * rows_per_page
|
||||||
query: str = "SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?"
|
query: str = (
|
||||||
|
"SELECT id, timestamp FROM memes ORDER BY timestamp DESC LIMIT 10 OFFSET ?"
|
||||||
|
)
|
||||||
async with await db_conn.execute(query, (offset,)) as db_cursor:
|
async with await db_conn.execute(query, (offset,)) as db_cursor:
|
||||||
results = await db_cursor.fetchall()
|
results = await db_cursor.fetchall()
|
||||||
for result in results:
|
for result in results:
|
||||||
|
|||||||
@@ -1269,9 +1269,11 @@ class SRUtil:
|
|||||||
album,
|
album,
|
||||||
song,
|
song,
|
||||||
selected_artist,
|
selected_artist,
|
||||||
|
(
|
||||||
best_track.get("album", {}).get("title")
|
best_track.get("album", {}).get("title")
|
||||||
if best_track.get("album")
|
if best_track.get("album")
|
||||||
else None,
|
else None
|
||||||
|
),
|
||||||
selected_title,
|
selected_title,
|
||||||
):
|
):
|
||||||
# Try to find another candidate that does match metadata
|
# Try to find another candidate that does match metadata
|
||||||
@@ -1294,9 +1296,11 @@ class SRUtil:
|
|||||||
album,
|
album,
|
||||||
song,
|
song,
|
||||||
cand_artist,
|
cand_artist,
|
||||||
|
(
|
||||||
candidate.get("album", {}).get("title")
|
candidate.get("album", {}).get("title")
|
||||||
if candidate.get("album")
|
if candidate.get("album")
|
||||||
else None,
|
else None
|
||||||
|
),
|
||||||
cand_title,
|
cand_title,
|
||||||
):
|
):
|
||||||
found_better = candidate
|
found_better = candidate
|
||||||
|
|||||||
Reference in New Issue
Block a user