feat: pre-filter and custom comparer for documents
This commit is contained in:
@@ -100,4 +100,38 @@ describe('search options', () => {
|
||||
expect(() => loadInvertedIndex(compressed)).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('filterDocument option', () => {
|
||||
it('should exclude filtered documents from results', () => {
|
||||
const compressed = buildInvertedIndex(testDocuments, { kuromoji });
|
||||
const invertedIndex = loadInvertedIndex(compressed);
|
||||
|
||||
// Search without filter - should find "宵の鳥" (documentId 2)
|
||||
const resultsWithoutFilter = searchInvertedIndex(invertedIndex, 'yoi');
|
||||
expect(resultsWithoutFilter.map(r => r.documentText)).toContain('宵の鳥');
|
||||
|
||||
// Search with filter excluding documentId 2
|
||||
const resultsWithFilter = searchInvertedIndex(invertedIndex, 'yoi', {
|
||||
filterDocument: id => id !== 2,
|
||||
});
|
||||
expect(resultsWithFilter.map(r => r.documentText)).not.toContain('宵の鳥');
|
||||
});
|
||||
});
|
||||
|
||||
describe('nextComparer option', () => {
|
||||
it('should use custom comparer for final sorting when other criteria are equal', () => {
|
||||
// Create documents that would have similar match scores
|
||||
const similarDocs = ['テストA', 'テストB', 'テストC'];
|
||||
const compressed = buildInvertedIndex(similarDocs, { kuromoji });
|
||||
const invertedIndex = loadInvertedIndex(compressed);
|
||||
|
||||
// Search with reverse order comparer
|
||||
const results = searchInvertedIndex(invertedIndex, 'テスト', {
|
||||
nextComparer: (a, b) => b - a, // Reverse by documentId
|
||||
});
|
||||
|
||||
// Should be in reverse documentId order (2, 1, 0) when other criteria equal
|
||||
expect(results.map(r => r.documentId)).toEqual([2, 1, 0]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -133,12 +133,24 @@ const compareFinalResult = getComparerForTraits<SearchResult>({
|
||||
getLastToken: state => state.tokens[state.tokens.length - 1]!,
|
||||
getMatchRatio: state => state.matchRatio,
|
||||
getMatchRatioLevel: state => Math.round(state.matchRatio * 5),
|
||||
nextComparer: (a, b) => a.documentText === b.documentText ? 0 : a.documentText < b.documentText ? -1 : 1,
|
||||
});
|
||||
|
||||
const hasNonEmptyCharacters = (documentCodePoints: string[], start: number, end: number) => start !== end && !documentCodePoints.slice(start, end).every(char => /\s/.test(char));
|
||||
|
||||
export const searchInvertedIndex = (invertedIndex: LoadedInvertedIndex, text: string): SearchResult[] => {
|
||||
export const searchInvertedIndex = (
|
||||
invertedIndex: LoadedInvertedIndex,
|
||||
text: string,
|
||||
options?: {
|
||||
/**
|
||||
* Called when all other comparisons are equal.
|
||||
*/
|
||||
nextComparer?: (documentIdA: number, documentIdB: number) => number;
|
||||
/**
|
||||
* If return falsy value for a document, it will be excluded from the final results.
|
||||
*/
|
||||
filterDocument?: (documentId: number) => unknown;
|
||||
},
|
||||
): SearchResult[] => {
|
||||
const { documents, documentCodePoints, tokenDefinitions, tries } = invertedIndex;
|
||||
|
||||
const codePoints = [...toKatakana(normalizeByCodePoint(text))];
|
||||
@@ -162,6 +174,7 @@ export const searchInvertedIndex = (invertedIndex: LoadedInvertedIndex, text: st
|
||||
...getTrieNodeTokenIds(otherNode, reachingInputEnd),
|
||||
]);
|
||||
for (const tokenId of matchingTokenIds) for (const { documentId, offsets } of tokenDefinitions[tokenId]!.references) {
|
||||
if (options?.filterDocument && !options.filterDocument(documentId)) continue;
|
||||
const isTokenPrefixMatching = !romajiNode?.tokenIds.includes(tokenId) && !kanaNode?.tokenIds.includes(tokenId) && !otherNode?.tokenIds.includes(tokenId);
|
||||
const previousMatchesOfDocument = dp[l - 1]?.get(documentId);
|
||||
if (l !== 0 && !previousMatchesOfDocument) continue;
|
||||
@@ -231,7 +244,13 @@ export const searchInvertedIndex = (invertedIndex: LoadedInvertedIndex, text: st
|
||||
matchRatio,
|
||||
matchRatioLevel,
|
||||
};
|
||||
}).sort(compareFinalResult);
|
||||
}).sort((a, b) => {
|
||||
const compareResult = compareFinalResult(a, b);
|
||||
if (compareResult !== 0) return compareResult;
|
||||
return options?.nextComparer
|
||||
? options.nextComparer(a.documentId, b.documentId)
|
||||
: a.documentText === b.documentText ? 0 : a.documentText < b.documentText ? -1 : 1;
|
||||
});
|
||||
};
|
||||
|
||||
// For debugging
|
||||
|
||||
Reference in New Issue
Block a user