From c6d2bad79db7676a2c96dba8d9a0db96d99422fb Mon Sep 17 00:00:00 2001
From: codey <codey@code.ee>
Date: Fri, 24 Oct 2025 13:40:55 -0400
Subject: [PATCH] Enhance lyric search functionality by improving line
 splitting logic and adding multi-line matching for subsearch. Update cache
 handling to ensure confidence threshold is respected before returning
 results.

---
 endpoints/lyric_search.py     | 63 ++++++++++++++++++++++++++++++++---
 lyric_search/sources/cache.py |  6 +++-
 2 files changed, 63 insertions(+), 6 deletions(-)
diff --git a/endpoints/lyric_search.py b/endpoints/lyric_search.py
index e43ef00..062826a 100644
--- a/endpoints/lyric_search.py
+++ b/endpoints/lyric_search.py
@@ -210,22 +210,77 @@ class LyricSearch(FastAPI):
 
         if data.sub and not data.lrc:
             seeked_found_line: Optional[int] = None
-            # Split lyrics into lines based on <br> or newline characters
+            # Split lyrics into lines based on <br>, newline characters, or " / "
             lyrics_text = result["lyrics"].strip()
+            
+            # Determine the delimiter and split accordingly
             if "<br>" in lyrics_text:
                 lyric_lines = lyrics_text.split("<br>")
+                separator = "<br>"
+            elif " / " in lyrics_text:
+                lyric_lines = lyrics_text.split(" / ")
+                separator = " / "
             else:
                 lyric_lines = lyrics_text.split("\n")
+                separator = "\n"
+            
+            search_term = data.sub.strip().lower()
+            
+            # First try single-line matching (existing behavior)
             for i, line in enumerate(lyric_lines):
                 # Remove any special characters and extra spaces
                 cleaned_line = regex.sub(r"\u2064", "", line.strip())
-                if data.sub.strip().lower() in cleaned_line.lower():
+                if search_term in cleaned_line.lower():
                     seeked_found_line = i
                     break
+            
+            # If no single-line match found, try multi-line matching
+            if seeked_found_line is None:
+                # Try matching across consecutive lines (up to 5 lines for reasonable performance)
+                max_lines_to_check = min(5, len(lyric_lines))
+                
+                for i in range(len(lyric_lines)):
+                    for line_count in range(2, max_lines_to_check + 1):
+                        if i + line_count <= len(lyric_lines):
+                            # Combine consecutive lines with space separator
+                            combined_lines = []
+                            line_positions: list[tuple[int, int]] = []  # Track where each line starts in combined text
+                            combined_text_parts: list[str] = []
+                            
+                            for j in range(line_count):
+                                if i + j < len(lyric_lines):
+                                    cleaned_line = regex.sub(r"\u2064", "", lyric_lines[i + j].strip())
+                                    combined_lines.append(cleaned_line)
+                                    
+                                    # Track position of this line in the combined text
+                                    line_start_pos = len(" ".join(combined_text_parts).lower())
+                                    if line_start_pos > 0:
+                                        line_start_pos += 1  # Account for space separator
+                                    line_positions.append((i + j, line_start_pos))
+                                    combined_text_parts.append(cleaned_line)
+                            
+                            combined_text = " ".join(combined_lines).lower()
+                            
+                            if search_term in combined_text:
+                                # Find which specific line the match starts in
+                                match_pos = combined_text.find(search_term)
+                                
+                                # Find the line that contains the start of the match
+                                actual_start_line = i  # Default fallback
+                                for line_idx, line_start_pos in line_positions:
+                                    if line_start_pos <= match_pos:
+                                        actual_start_line = line_idx
+                                    else:
+                                        break
+                                
+                                seeked_found_line = actual_start_line
+                                break
+                    
+                    if seeked_found_line is not None:
+                        break
 
             if seeked_found_line is None:
                 return JSONResponse(
-                    status_code=500,
                     content={
                         "err": True,
                         "errorText": "Seek (a.k.a. subsearch) failed.",
@@ -233,8 +288,6 @@ class LyricSearch(FastAPI):
                     },
                 )
             # Only include lines strictly starting from the matched line
-            # Use the same separator that was used to split
-            separator = "<br>" if "<br>" in result["lyrics"] else "\n"
             result["lyrics"] = separator.join(lyric_lines[seeked_found_line:])
 
         result["confidence"] = int(result["confidence"])
diff --git a/lyric_search/sources/cache.py b/lyric_search/sources/cache.py
index c21df54..6887416 100644
--- a/lyric_search/sources/cache.py
+++ b/lyric_search/sources/cache.py
@@ -343,15 +343,19 @@ class Cache:
                             )
                         else:
                             best_match = (result_tracks[0], 100)
-                        if not best_match or confidence < 90:
+                        if not best_match:
                             return None
                         (candidate, confidence) = best_match
+                        if confidence < 90:
+                            return None
                         logging.info("Result found on %s", self.label)
                         matched = self.get_matched(
                             sqlite_rows=results,
                             matched_candidate=candidate,
                             confidence=confidence,
                         )
+                        if matched is None:
+                            return None
                         time_end: float = time.time()
                         time_diff: float = time_end - time_start
                         matched.time = time_diff