diff --git a/dotnet/MaigoLabs.NeedLe.Searcher/InvertedIndexSearcher.cs b/dotnet/MaigoLabs.NeedLe.Searcher/InvertedIndexSearcher.cs index e7c8956..9f75b29 100644 --- a/dotnet/MaigoLabs.NeedLe.Searcher/InvertedIndexSearcher.cs +++ b/dotnet/MaigoLabs.NeedLe.Searcher/InvertedIndexSearcher.cs @@ -167,6 +167,8 @@ public static class InvertedIndexSearcher public static SearchResult[] Search(LoadedInvertedIndex invertedIndex, string text, InvertedIndexSearcherOptions? options = null) { + if (string.IsNullOrWhiteSpace(text)) return []; + var documents = invertedIndex.Documents; var documentCodePoints = invertedIndex.DocumentCodePoints; var tokenDefinitions = invertedIndex.TokenDefinitions; @@ -184,9 +186,13 @@ public static class InvertedIndexSearcher for (var r = l; r < codePoints.Length && (romajiNode != null || kanaNode != null || otherNode != null); r++) // [l, r] { var codePoint = codePoints[r]; - romajiNode = romajiNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint)); - kanaNode = kanaNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint)); - otherNode = otherNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint)); + var nextRomajiNode = romajiNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint)); + var nextKanaNode = kanaNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint)); + var nextOtherNode = otherNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint)); + if (nextRomajiNode == romajiNode && nextKanaNode == kanaNode && nextOtherNode == otherNode) continue; // This code point is fully ignored on current state + romajiNode = nextRomajiNode; + kanaNode = nextKanaNode; + otherNode = nextOtherNode; var reachingInputEnd = r == codePoints.Length - 1; HashSet matchingTokenIds = [ diff --git a/packages/needle/src/searcher/search.ts b/packages/needle/src/searcher/search.ts index 575fec7..1c24397 100644 --- a/packages/needle/src/searcher/search.ts +++ b/packages/needle/src/searcher/search.ts @@ -151,6 +151,8 @@ export const searchInvertedIndex = ( filterDocument?: (documentId: number) => unknown; }, ): SearchResult[] => { + if (!text.trim()) return []; + const { documents, documentCodePoints, tokenDefinitions, tries } = invertedIndex; const codePoints = [...toKatakana(normalizeByCodePoint(text))]; @@ -163,9 +165,13 @@ export const searchInvertedIndex = ( let otherNode: TrieNode | undefined = tries.other; for (let r = l; r < codePoints.length && (romajiNode || kanaNode || otherNode); r++) { // [l, r] const codePoint = codePoints[r]!; - romajiNode = traverseTrieStep(romajiNode, codePoint, IGNORABLE_CODE_POINTS); - kanaNode = traverseTrieStep(kanaNode, codePoint, IGNORABLE_CODE_POINTS); - otherNode = traverseTrieStep(otherNode, codePoint, IGNORABLE_CODE_POINTS); + const nextRomajiNode = traverseTrieStep(romajiNode, codePoint, IGNORABLE_CODE_POINTS); + const nextKanaNode = traverseTrieStep(kanaNode, codePoint, IGNORABLE_CODE_POINTS); + const nextOtherNode = traverseTrieStep(otherNode, codePoint, IGNORABLE_CODE_POINTS); + if (nextRomajiNode === romajiNode && nextKanaNode === kanaNode && nextOtherNode === otherNode) continue; // This code point is fully ignored on current state + romajiNode = nextRomajiNode; + kanaNode = nextKanaNode; + otherNode = nextOtherNode; const reachingInputEnd = r === codePoints.length - 1; const matchingTokenIds = new Set([ // Allow suffix matching of romaji/other tokens if we're at the end of the input