Enhance lyric search functionality by improving line splitting logic and adding multi-line matching for subsearch. Update cache handling to ensure confidence threshold is respected before returning results.
This commit is contained in:
@@ -210,22 +210,77 @@ class LyricSearch(FastAPI):
|
|||||||
|
|
||||||
if data.sub and not data.lrc:
|
if data.sub and not data.lrc:
|
||||||
seeked_found_line: Optional[int] = None
|
seeked_found_line: Optional[int] = None
|
||||||
# Split lyrics into lines based on <br> or newline characters
|
# Split lyrics into lines based on <br>, newline characters, or " / "
|
||||||
lyrics_text = result["lyrics"].strip()
|
lyrics_text = result["lyrics"].strip()
|
||||||
|
|
||||||
|
# Determine the delimiter and split accordingly
|
||||||
if "<br>" in lyrics_text:
|
if "<br>" in lyrics_text:
|
||||||
lyric_lines = lyrics_text.split("<br>")
|
lyric_lines = lyrics_text.split("<br>")
|
||||||
|
separator = "<br>"
|
||||||
|
elif " / " in lyrics_text:
|
||||||
|
lyric_lines = lyrics_text.split(" / ")
|
||||||
|
separator = " / "
|
||||||
else:
|
else:
|
||||||
lyric_lines = lyrics_text.split("\n")
|
lyric_lines = lyrics_text.split("\n")
|
||||||
|
separator = "\n"
|
||||||
|
|
||||||
|
search_term = data.sub.strip().lower()
|
||||||
|
|
||||||
|
# First try single-line matching (existing behavior)
|
||||||
for i, line in enumerate(lyric_lines):
|
for i, line in enumerate(lyric_lines):
|
||||||
# Remove any special characters and extra spaces
|
# Remove any special characters and extra spaces
|
||||||
cleaned_line = regex.sub(r"\u2064", "", line.strip())
|
cleaned_line = regex.sub(r"\u2064", "", line.strip())
|
||||||
if data.sub.strip().lower() in cleaned_line.lower():
|
if search_term in cleaned_line.lower():
|
||||||
seeked_found_line = i
|
seeked_found_line = i
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# If no single-line match found, try multi-line matching
|
||||||
|
if seeked_found_line is None:
|
||||||
|
# Try matching across consecutive lines (up to 5 lines for reasonable performance)
|
||||||
|
max_lines_to_check = min(5, len(lyric_lines))
|
||||||
|
|
||||||
|
for i in range(len(lyric_lines)):
|
||||||
|
for line_count in range(2, max_lines_to_check + 1):
|
||||||
|
if i + line_count <= len(lyric_lines):
|
||||||
|
# Combine consecutive lines with space separator
|
||||||
|
combined_lines = []
|
||||||
|
line_positions: list[tuple[int, int]] = [] # Track where each line starts in combined text
|
||||||
|
combined_text_parts: list[str] = []
|
||||||
|
|
||||||
|
for j in range(line_count):
|
||||||
|
if i + j < len(lyric_lines):
|
||||||
|
cleaned_line = regex.sub(r"\u2064", "", lyric_lines[i + j].strip())
|
||||||
|
combined_lines.append(cleaned_line)
|
||||||
|
|
||||||
|
# Track position of this line in the combined text
|
||||||
|
line_start_pos = len(" ".join(combined_text_parts).lower())
|
||||||
|
if line_start_pos > 0:
|
||||||
|
line_start_pos += 1 # Account for space separator
|
||||||
|
line_positions.append((i + j, line_start_pos))
|
||||||
|
combined_text_parts.append(cleaned_line)
|
||||||
|
|
||||||
|
combined_text = " ".join(combined_lines).lower()
|
||||||
|
|
||||||
|
if search_term in combined_text:
|
||||||
|
# Find which specific line the match starts in
|
||||||
|
match_pos = combined_text.find(search_term)
|
||||||
|
|
||||||
|
# Find the line that contains the start of the match
|
||||||
|
actual_start_line = i # Default fallback
|
||||||
|
for line_idx, line_start_pos in line_positions:
|
||||||
|
if line_start_pos <= match_pos:
|
||||||
|
actual_start_line = line_idx
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
seeked_found_line = actual_start_line
|
||||||
|
break
|
||||||
|
|
||||||
|
if seeked_found_line is not None:
|
||||||
|
break
|
||||||
|
|
||||||
if seeked_found_line is None:
|
if seeked_found_line is None:
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=500,
|
|
||||||
content={
|
content={
|
||||||
"err": True,
|
"err": True,
|
||||||
"errorText": "Seek (a.k.a. subsearch) failed.",
|
"errorText": "Seek (a.k.a. subsearch) failed.",
|
||||||
@@ -233,8 +288,6 @@ class LyricSearch(FastAPI):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
# Only include lines strictly starting from the matched line
|
# Only include lines strictly starting from the matched line
|
||||||
# Use the same separator that was used to split
|
|
||||||
separator = "<br>" if "<br>" in result["lyrics"] else "\n"
|
|
||||||
result["lyrics"] = separator.join(lyric_lines[seeked_found_line:])
|
result["lyrics"] = separator.join(lyric_lines[seeked_found_line:])
|
||||||
|
|
||||||
result["confidence"] = int(result["confidence"])
|
result["confidence"] = int(result["confidence"])
|
||||||
|
|||||||
@@ -343,15 +343,19 @@ class Cache:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
best_match = (result_tracks[0], 100)
|
best_match = (result_tracks[0], 100)
|
||||||
if not best_match or confidence < 90:
|
if not best_match:
|
||||||
return None
|
return None
|
||||||
(candidate, confidence) = best_match
|
(candidate, confidence) = best_match
|
||||||
|
if confidence < 90:
|
||||||
|
return None
|
||||||
logging.info("Result found on %s", self.label)
|
logging.info("Result found on %s", self.label)
|
||||||
matched = self.get_matched(
|
matched = self.get_matched(
|
||||||
sqlite_rows=results,
|
sqlite_rows=results,
|
||||||
matched_candidate=candidate,
|
matched_candidate=candidate,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
)
|
)
|
||||||
|
if matched is None:
|
||||||
|
return None
|
||||||
time_end: float = time.time()
|
time_end: float = time.time()
|
||||||
time_diff: float = time_end - time_start
|
time_diff: float = time_end - time_start
|
||||||
matched.time = time_diff
|
matched.time = time_diff
|
||||||
|
|||||||
Reference in New Issue
Block a user