Files
needLe/dotnet/MaigoLabs.NeedLe.Tests/E2E/TrieSerializationTests.cs
T
2026-01-01 03:40:41 +08:00

144 lines
4.6 KiB
C#

using MaigoLabs.NeedLe.Common;
using MaigoLabs.NeedLe.Common.Extensions;
using MaigoLabs.NeedLe.Indexer.Trie;
using MaigoLabs.NeedLe.Searcher.Trie;
namespace MaigoLabs.NeedLe.Tests.E2E;
#region Trie Building
public sealed class TrieBuilding_BuildsTrieWithMultipleDifferentTokensTest : NeedleTestBase
{
[Fact]
public void Execute()
{
var trie = TrieBuilder.BuildTrie([
(0, "hello".ToCodePoints()),
(1, "help".ToCodePoints()),
(2, "world".ToCodePoints()),
(3, "word".ToCodePoints()),
]);
// Traverse to verify structure
var helloNode = trie.Traverse("hello".ToCodePoints().ToArray());
var helpNode = trie.Traverse("help".ToCodePoints().ToArray());
var worldNode = trie.Traverse("world".ToCodePoints().ToArray());
var wordNode = trie.Traverse("word".ToCodePoints().ToArray());
Assert.NotNull(helloNode);
Assert.NotNull(helpNode);
Assert.NotNull(worldNode);
Assert.NotNull(wordNode);
// Check token IDs
Assert.Contains(0, helloNode!.TokenIds);
Assert.Contains(1, helpNode!.TokenIds);
Assert.Contains(2, worldNode!.TokenIds);
Assert.Contains(3, wordNode!.TokenIds);
// Check that 'hel' prefix node has both tokens in subTree
var helNode = trie.Traverse("hel".ToCodePoints().ToArray());
Assert.NotNull(helNode);
Assert.Contains(0, helNode!.SubTreeTokenIds);
Assert.Contains(1, helNode.SubTreeTokenIds);
}
}
public sealed class TrieBuilding_HandlesJapaneseTextTokensTest : NeedleTestBase
{
[Fact]
public void Execute()
{
var trie = TrieBuilder.BuildTrie([
(0, "さくら".ToCodePoints()),
(1, "サクラ".ToCodePoints()),
(2, "桜".ToCodePoints()),
]);
Assert.Contains(0, trie.Traverse("さくら".ToCodePoints().ToArray())?.TokenIds ?? []);
Assert.Contains(1, trie.Traverse("サクラ".ToCodePoints().ToArray())?.TokenIds ?? []);
Assert.Contains(2, trie.Traverse("桜".ToCodePoints().ToArray())?.TokenIds ?? []);
}
}
#endregion
#region Trie Serialization
public sealed class TrieSerialization_SerializesAndDeserializesCorrectlyTest : NeedleTestBase
{
[Fact]
public void Execute()
{
var originalTrie = TrieBuilder.BuildTrie([
(0, "apple".ToCodePoints()),
(1, "app".ToCodePoints()),
(2, "banana".ToCodePoints()),
]);
// Serialize
var serialized = TrieSerializer.Serialize(originalTrie);
Assert.True(serialized.Length > 0);
// Deserialize
var deserialized = TrieDeserializer.Deserialize(serialized);
var deserializedTrie = deserialized.Root;
var tokenCodePoints = deserialized.TokenCodePoints;
// Verify structure is preserved
var appleNode = deserializedTrie.Traverse("apple".ToCodePoints().ToArray());
var appNode = deserializedTrie.Traverse("app".ToCodePoints().ToArray());
var bananaNode = deserializedTrie.Traverse("banana".ToCodePoints().ToArray());
Assert.NotNull(appleNode);
Assert.NotNull(appNode);
Assert.NotNull(bananaNode);
Assert.Contains(0, appleNode!.TokenIds);
Assert.Contains(1, appNode!.TokenIds);
Assert.Contains(2, bananaNode!.TokenIds);
// Verify tokenCodePoints map
Assert.Equal("apple", tokenCodePoints[0].ToUtf32String());
Assert.Equal("app", tokenCodePoints[1].ToUtf32String());
Assert.Equal("banana", tokenCodePoints[2].ToUtf32String());
// Verify subTreeTokenIds are reconstructed
Assert.Contains(0, appNode.SubTreeTokenIds);
Assert.Contains(1, appNode.SubTreeTokenIds);
}
}
public sealed class TrieSerialization_PreservesParentReferencesAfterDeserializationTest : NeedleTestBase
{
[Fact]
public void Execute()
{
var originalTrie = TrieBuilder.BuildTrie([
(0, "test".ToCodePoints()),
]);
var serialized = TrieSerializer.Serialize(originalTrie);
var deserialized = TrieDeserializer.Deserialize(serialized);
var root = deserialized.Root;
var testNode = root.Traverse("test".ToCodePoints().ToArray());
Assert.NotNull(testNode);
// Walk back to root via parent references
TrieNode? node = testNode;
var depth = 0;
while (node?.Parent != null)
{
node = node.Parent;
depth++;
}
Assert.Equal(4, depth); // 't' -> 'e' -> 's' -> 't' -> root
Assert.Same(root, node);
}
}
#endregion