From c6d2bad79db7676a2c96dba8d9a0db96d99422fb Mon Sep 17 00:00:00 2001 From: codey Date: Fri, 24 Oct 2025 13:40:55 -0400 Subject: [PATCH] Enhance lyric search functionality by improving line splitting logic and adding multi-line matching for subsearch. Update cache handling to ensure confidence threshold is respected before returning results. --- endpoints/lyric_search.py | 63 ++++++++++++++++++++++++++++++++--- lyric_search/sources/cache.py | 6 +++- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/endpoints/lyric_search.py b/endpoints/lyric_search.py index e43ef00..062826a 100644 --- a/endpoints/lyric_search.py +++ b/endpoints/lyric_search.py @@ -210,22 +210,77 @@ class LyricSearch(FastAPI): if data.sub and not data.lrc: seeked_found_line: Optional[int] = None - # Split lyrics into lines based on
or newline characters + # Split lyrics into lines based on
, newline characters, or " / " lyrics_text = result["lyrics"].strip() + + # Determine the delimiter and split accordingly if "
" in lyrics_text: lyric_lines = lyrics_text.split("
") + separator = "
" + elif " / " in lyrics_text: + lyric_lines = lyrics_text.split(" / ") + separator = " / " else: lyric_lines = lyrics_text.split("\n") + separator = "\n" + + search_term = data.sub.strip().lower() + + # First try single-line matching (existing behavior) for i, line in enumerate(lyric_lines): # Remove any special characters and extra spaces cleaned_line = regex.sub(r"\u2064", "", line.strip()) - if data.sub.strip().lower() in cleaned_line.lower(): + if search_term in cleaned_line.lower(): seeked_found_line = i break + + # If no single-line match found, try multi-line matching + if seeked_found_line is None: + # Try matching across consecutive lines (up to 5 lines for reasonable performance) + max_lines_to_check = min(5, len(lyric_lines)) + + for i in range(len(lyric_lines)): + for line_count in range(2, max_lines_to_check + 1): + if i + line_count <= len(lyric_lines): + # Combine consecutive lines with space separator + combined_lines = [] + line_positions: list[tuple[int, int]] = [] # Track where each line starts in combined text + combined_text_parts: list[str] = [] + + for j in range(line_count): + if i + j < len(lyric_lines): + cleaned_line = regex.sub(r"\u2064", "", lyric_lines[i + j].strip()) + combined_lines.append(cleaned_line) + + # Track position of this line in the combined text + line_start_pos = len(" ".join(combined_text_parts).lower()) + if line_start_pos > 0: + line_start_pos += 1 # Account for space separator + line_positions.append((i + j, line_start_pos)) + combined_text_parts.append(cleaned_line) + + combined_text = " ".join(combined_lines).lower() + + if search_term in combined_text: + # Find which specific line the match starts in + match_pos = combined_text.find(search_term) + + # Find the line that contains the start of the match + actual_start_line = i # Default fallback + for line_idx, line_start_pos in line_positions: + if line_start_pos <= match_pos: + actual_start_line = line_idx + else: + break + + seeked_found_line = actual_start_line + break + + if seeked_found_line is not None: + break if seeked_found_line is None: return JSONResponse( - status_code=500, content={ "err": True, "errorText": "Seek (a.k.a. subsearch) failed.", @@ -233,8 +288,6 @@ class LyricSearch(FastAPI): }, ) # Only include lines strictly starting from the matched line - # Use the same separator that was used to split - separator = "
" if "
" in result["lyrics"] else "\n" result["lyrics"] = separator.join(lyric_lines[seeked_found_line:]) result["confidence"] = int(result["confidence"]) diff --git a/lyric_search/sources/cache.py b/lyric_search/sources/cache.py index c21df54..6887416 100644 --- a/lyric_search/sources/cache.py +++ b/lyric_search/sources/cache.py @@ -343,15 +343,19 @@ class Cache: ) else: best_match = (result_tracks[0], 100) - if not best_match or confidence < 90: + if not best_match: return None (candidate, confidence) = best_match + if confidence < 90: + return None logging.info("Result found on %s", self.label) matched = self.get_matched( sqlite_rows=results, matched_candidate=candidate, confidence=confidence, ) + if matched is None: + return None time_end: float = time.time() time_diff: float = time_end - time_start matched.time = time_diff