Files
needLe/dotnet/MaigoLabs.NeedLe.Searcher/InvertedIndexLoader.cs
T
2026-01-01 03:40:41 +08:00

73 lines
2.7 KiB
C#

using MaigoLabs.NeedLe.Common;
using MaigoLabs.NeedLe.Common.Extensions;
using MaigoLabs.NeedLe.Common.Types;
using MaigoLabs.NeedLe.Searcher.Trie;
namespace MaigoLabs.NeedLe.Searcher;
public class LoadedInvertedIndex
{
public class TokenDocumentReference
{
public required int DocumentId { get; set; }
public required OffsetSpan[] Offsets { get; set; }
}
public class TokenDefinitionExtended : TokenDefinition
{
public required TokenDocumentReference[] References { get; set; }
}
public class TypedTries
{
public required TrieNode Romaji { get; set; }
public required TrieNode Kana { get; set; }
public required TrieNode Other { get; set; }
}
public required string[] Documents { get; set; }
public required int[][] DocumentCodePoints { get; set; }
public required TokenDefinitionExtended[] TokenDefinitions { get; set; }
public required TypedTries Tries { get; set; }
}
public class InvertedIndexLoader
{
public static LoadedInvertedIndex Load(CompressedInvertedIndex compressed)
{
var documents = compressed.documents;
var documentCodePoints = documents.Select(document => document.ToCodePoints().ToArray()).ToArray();
var romajiTrie = TrieDeserializer.Deserialize(compressed.tries.romaji);
var kanaTrie = TrieDeserializer.Deserialize(compressed.tries.kana);
var otherTrie = TrieDeserializer.Deserialize(compressed.tries.other);
var tokenCodePoints = romajiTrie.TokenCodePoints.Concat(kanaTrie.TokenCodePoints).Concat(otherTrie.TokenCodePoints)
.ToDictionary(entry => entry.Key, entry => entry.Value);
var tokenDefinitions = compressed.tokenTypes.Select((type, index) => new LoadedInvertedIndex.TokenDefinitionExtended
{
Id = index, Type = (TokenType)type, Text = tokenCodePoints[index].ToUtf32String(),
CodePointLength = tokenCodePoints[index].Length,
References = compressed.tokenReferences[index].Select(data => new LoadedInvertedIndex.TokenDocumentReference
{
DocumentId = data[0],
Offsets = Enumerable.Range(0, data.Length / 2)
.Select(i => new OffsetSpan { Start = data[i * 2 + 1], End = data[i * 2 + 2] }).ToArray(),
}).ToArray(),
}).ToArray();
return new LoadedInvertedIndex
{
Documents = documents,
DocumentCodePoints = documentCodePoints,
TokenDefinitions = tokenDefinitions,
Tries = new LoadedInvertedIndex.TypedTries
{
Romaji = romajiTrie.Root,
Kana = kanaTrie.Root,
Other = otherTrie.Root,
},
};
}
}