feat: initial commit

2026-01-01 03:40:41 +08:00
commit 631f8ed771
98 changed files with 14776 additions and 0 deletions
@@ -0,0 +1,25 @@
+root = true
+
+[*.cs]
+charset = utf-8
+indent_style = space
+indent_size = 4
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.{csproj,props,slnx}]
+charset = utf-8
+indent_style = space
+indent_size = 2
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.{ts,tsx,js,json}]
+charset = utf-8
+indent_style = space
+indent_size = 2
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
@@ -0,0 +1,41 @@
+name: CI
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  ci:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 24
+          cache: pnpm
+
+      - name: Setup .NET
+        uses: actions/setup-dotnet@v4
+        with:
+          dotnet-version: 10.0.x
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Typecheck
+        run: pnpm typecheck
+
+      - name: Lint
+        run: pnpm lint
+
+      - name: Test (TypeScript)
+        run: pnpm test
+
+      - name: Test (.NET)
+        run: pnpm test:dotnet
@@ -0,0 +1,141 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.*
+!.env.example
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+.output
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+
+# Sveltekit cache directory
+.svelte-kit/
+
+# vitepress build output
+**/.vitepress/dist
+
+# vitepress cache directory
+**/.vitepress/cache
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# Firebase cache directory
+.firebase/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v3
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/sdks
+!.yarn/versions
+
+# Vite files
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+.vite/
@@ -0,0 +1,18 @@
+{
+  "editor.tabSize": 4,
+  "dotnet.defaultSolution": "dotnet/MaigoLabs.NeedLe.slnx",
+  "files.associations": {
+    "*.slnx": "xml"
+  },
+  "eslint.useFlatConfig": true,
+  "editor.codeActionsOnSave": {
+    "source.fixAll.eslint": "explicit"
+  },
+  "eslint.rules.customizations": [
+    {
+      "rule": "*",
+      "severity": "warn"
+    }
+  ],
+  "eslint.validate": ["javascript", "javascriptreact", "typescript", "typescriptreact", "vue"]
+}
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.
@@ -0,0 +1,20 @@
+# needLe
+
+Fuzzy search engine for small text pieces, with Chinese/Japanese pronunciation support.
+
+Available in [TypeScript](./packages/needle) and [C#](./dotnet). Click the link for detailed documentation.
+
+See also [in-browser demo](https://needle.maigo.dev).
+
+## Packages
+
+| Platform | Package | Install |
+|:--------:|:-------:|:-------:|
+| Node.js / Browser | [@maigolabs/needle](https://www.npmjs.com/package/@maigolabs/needle) | `pnpm add @maigolabs/needle` |
+| .NET Standard 2.0 | [MaigoLabs.NeedLe](https://www.nuget.org/packages/MaigoLabs.NeedLe) | `dotnet add package MaigoLabs.NeedLe` |
+
+## The Name
+
+The word "needle" is from the phrase [Needle in a Haystack](https://en.wikipedia.org/wiki/Needle_in_a_haystack). Normally, searching tasks are finding a small string ("needle") in a large string ("haystack"). However, this project is designed for searching in small strings (specifically, music names) instead of large strings. We are finding needles in needles.
+
+The capitalized "L" is from the music name [needLe](https://projectsekai.fandom.com/wiki/NeedLe).
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>MaigoLabs :: needLe</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
@@ -0,0 +1,28 @@
+{
+  "name": "@maigolabs/needle-demo",
+  "version": "1.0.0",
+  "type": "module",
+  "scripts": {
+    "typecheck": "tsc",
+    "dev": "vite --port 5172",
+    "build": "tsc -b && vite build"
+  },
+  "license": "AGPL-3.0",
+  "packageManager": "pnpm@10.20.0",
+  "private": true,
+  "dependencies": {
+    "@maigolabs/needle": "workspace:*",
+    "react": "^19.2.0",
+    "react-dom": "^19.2.0"
+  },
+  "devDependencies": {
+    "@iconify-json/svg-spinners": "^1.2.4",
+    "@types/node": "^24.10.1",
+    "@types/react": "^19.2.5",
+    "@types/react-dom": "^19.2.3",
+    "@vitejs/plugin-react": "^5.1.1",
+    "unocss": "^66.5.12",
+    "vite": "^7.2.4",
+    "vite-plugin-top-level-await": "^1.6.0"
+  }
+}
@@ -0,0 +1 @@
+/fonts
@@ -0,0 +1,168 @@
+import { TokenType } from '@maigolabs/needle/common';
+import {
+  searchInvertedIndex,
+  highlightSearchResult,
+  type SearchResult,
+} from '@maigolabs/needle/searcher';
+import { useState, type FunctionComponent } from 'react';
+
+type Tab = 'search' | 'tokenize';
+
+type AppData = typeof import('./data');
+export const Layout: FunctionComponent<{ dataPromise: Promise<AppData> }> = ({ dataPromise }) => {
+  const [appData, setAppData] = useState<AppData | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  void dataPromise.then(props => setAppData(props)).catch(error => setError((error instanceof Error ? error.stack : undefined) ?? String(error)));
+  return (
+    <div className="min-h-screen bg-[#f9f2e0] text-[#8b7355] font-mono selection:bg-[#d4c4b0]/70">
+      <div className="max-w-200 mx-auto px-4 pt-8 pb-6">
+        <header className="mb-8">
+          <h1 className="pb-3 text-2xl text-[#a08060]">MaigoLabs :: needLe</h1>
+          <div className="pb-4 text-sm">
+            <p>Fuzzy search engine for small text pieces, with Chinese/Japanese pronunciation support</p>
+            <p>(Available in TypeScript and C#)</p>
+          </div>
+          <div className="flex gap-4 text-sm">
+            <a href="https://github.com/MaigoLabs/needLe" target="_blank" rel="noopener" className="text-[#b8a890] hover:text-[#8b7355]">[GitHub]</a>
+            <a href="https://www.npmjs.com/package/@maigolabs/needle" target="_blank" rel="noopener" className="text-[#b8a890] hover:text-[#8b7355]">[NPM]</a>
+            <a href="https://www.nuget.org/packages/MaigoLabs.NeedLe" target="_blank" rel="noopener" className="text-[#b8a890] hover:text-[#8b7355]">[NuGet]</a>
+          </div>
+        </header>
+
+        {
+          appData
+            ? <App appData={appData} />
+            : error
+              ? <div className="text-sm bg-[#efe5d0] px-4 py-3 rounded-lg whitespace-pre-wrap">{error}</div>
+              : <div>
+                  <div className="flex flex-row items-center gap-2"><div className="i-svg-spinners:ring-resize" /> Loading...</div>
+                  <div className="mt-6 text-sm bg-[#efe5d0] px-4 py-3 rounded-lg">
+                    <div className="font-bold mb-2">Tips:</div>
+                    <div>This demo loads Kuromoji/OpenCC/pinyin-pro for tokenization and index building.</div>
+                    <div>However, searching on a prebuilt index doesn't require loading any external library/dictionary.</div>
+                  </div>
+                </div>
+        }
+      </div>
+    </div>
+  );
+};
+
+interface AppProps {
+  appData: AppData;
+}
+
+export const App: FunctionComponent<AppProps> = ({ appData: { kuromoji, createTokenizer, invertedIndex } }) => {
+  const [input, setInput] = useState('');
+  const [tab, setTab] = useState<Tab>('search');
+
+  const searchResults = tab === 'search' && input.trim()
+    ? searchInvertedIndex(invertedIndex, input).slice(0, 50)
+    : [];
+
+  const tokenizeResults = tab === 'tokenize' && input.trim()
+    ? (() => {
+        const tokenizer = createTokenizer({ kuromoji });
+        const tokens = tokenizer.tokenize(input);
+        const tokenDefs = tokenizer.tokens;
+        const codePoints = [...input];
+        return tokens.map(t => {
+          const def = [...tokenDefs.values()].find(d => d.id === t.id)!;
+          const original = codePoints.slice(t.start, t.end).join('');
+          return { ...t, type: def.type, text: def.text, original };
+        });
+      })()
+    : [];
+
+  return (
+    <>
+      <input
+        type="text"
+        value={input}
+        onChange={e => setInput(e.target.value)}
+        placeholder={`Type something to ${tab}...`}
+        className="w-full bg-[#efe5d0] text-[#6b5a48] px-3 py-2 mb-2 outline-none placeholder-[#b8a890] rounded-lg"
+      />
+
+      <div className="flex gap-4 mb-6 text-sm">
+        <button
+          onClick={() => setTab('search')}
+          className={`bg-transparent border-none cursor-pointer ${tab === 'search' ? 'text-[#6b5a48]' : 'text-[#c0b0a0]'}`}
+        >
+          Search
+        </button>
+        <button
+          onClick={() => setTab('tokenize')}
+          className={`bg-transparent border-none cursor-pointer ${tab === 'tokenize' ? 'text-[#6b5a48]' : 'text-[#c0b0a0]'}`}
+        >
+          Tokenize
+        </button>
+      </div>
+
+      <div className="space-y-2">
+        {tab === 'search' && searchResults.map((result, i) => (
+          <SearchResultItem key={i} result={result} input={input} />
+        ))}
+
+        {tab === 'tokenize' && tokenizeResults.length > 0 && (
+          <div className="grid grid-cols-[repeat(auto-fill,minmax(280px,1fr))] gap-1">
+            {tokenizeResults.map((token, i) => (
+              <div key={i} className="bg-[#efe5d0] px-3 py-2 text-sm truncate rounded-lg">
+                <span className="text-[#a08060]">{TokenType[token.type]}: </span>
+                <span className="text-[#6b5a48]">{JSON.stringify(token.text)}</span>
+                <span className="text-[#c0b0a0]">{' <- '}</span>
+                <span className="text-[#8b7355]">{JSON.stringify(token.original)}</span>
+                <span className="text-[#c8bba8]">{` [${token.start}, ${token.end}]`}</span>
+              </div>
+            ))}
+          </div>
+        )}
+
+        {input.trim() && tab === 'search' && searchResults.length === 0 && (
+          <div className="text-[#b8a890] text-sm">No results.</div>
+        )}
+      </div>
+    </>
+  );
+};
+
+const SearchResultItem: FunctionComponent<{ result: SearchResult; input: string }> = ({ result, input }) => {
+  const highlighted = highlightSearchResult(result);
+  const inputCodePoints = [...input];
+
+  const stats = [
+    `${result.rangeCount} range(s)`,
+    `${Math.round(result.matchRatio * 100)}%`,
+    result.prefixMatchCount > 0 ? `${result.prefixMatchCount} prefix` : null,
+  ].filter(Boolean).join(', ');
+
+  return (
+    <div className="bg-[#efe5d0] px-3 py-2 text-sm rounded-lg">
+      <div className="flex gap-2">
+        <div className="flex-1 truncate">
+          {highlighted.map((part, i) =>
+            typeof part === 'string'
+              ? <span key={i} className="text-[#b8a890]">{part}</span>
+              : <span key={i} className="text-[#5a4a38]">{part.highlight}</span>)}
+        </div>
+        <div className="text-[#c8bba8] shrink-0">{stats}</div>
+      </div>
+
+      <div className="grid grid-cols-[repeat(auto-fill,minmax(200px,1fr))] gap-x-2 mt-1">
+        {result.tokens.map((token, i) => {
+          const inputText = inputCodePoints.slice(token.inputOffset.start, token.inputOffset.end).join('');
+          const docText = result.documentCodePoints.slice(token.documentOffset.start, token.documentOffset.end).join('');
+          return (
+            <div key={i} className="text-[11px] truncate">
+              <span className="text-[#b8a890]">{TokenType[token.definition.type]}: </span>
+              <span className="text-[#8b7355]">{JSON.stringify(inputText)}</span>
+              <span className="text-[#c8bba8]">{' -> '}</span>
+              <span className="text-[#6b5a48]">{JSON.stringify(docText)}</span>
+              {token.isTokenPrefixMatching && <span className="text-[#b8a890]">{' (prefix)'}</span>}
+            </div>
+          );
+        })}
+      </div>
+    </div>
+  );
+};
@@ -0,0 +1,26 @@
+import { buildInvertedIndex } from '@maigolabs/needle/indexer';
+import { loadInvertedIndex } from '@maigolabs/needle/searcher';
+import { TokenizerBuilder } from '@patdx/kuromoji';
+
+// Indexer loads OpenCC and pinyin-pro which is large, put them in data.ts for dynamic importing.
+export { createTokenizer } from '@maigolabs/needle/indexer';
+
+const musicNames: string[] = [...new Set(
+  Object.values(
+    await (await fetch('https://sekai-world.github.io/sekai-master-db-diff/musics.json')).json(),
+  ).map(music => (music as { title: string }).title),
+)];
+
+export const kuromoji = await new TokenizerBuilder({
+  loader: {
+    loadArrayBuffer: async (url: string) => {
+      url = `https://cdn.jsdelivr.net/npm/@aiktb/kuromoji@1.0.2/dict/${url.replace('.gz', '')}`;
+      const res = await fetch(url);
+      if (!res.ok) throw new Error(`Failed to fetch ${url}`);
+      return await res.arrayBuffer();
+    },
+  },
+}).build();
+
+export const compressed = buildInvertedIndex(musicNames, { kuromoji });
+export const invertedIndex = loadInvertedIndex(compressed);
@@ -0,0 +1,12 @@
+import { StrictMode } from 'react';
+import { createRoot } from 'react-dom/client';
+
+import { Layout } from './App';
+import 'virtual:uno.css';
+import '@unocss/reset/tailwind.css';
+
+createRoot(document.getElementById('root')!).render(
+  <StrictMode>
+    <Layout dataPromise={import('./data')} />
+  </StrictMode>,
+);
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "ESNext",
+    "jsx": "preserve",
+    "lib": ["DOM", "DOM.Iterable", "ESNext", "WebWorker"],
+    "types": ["vite/client"],
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "noUncheckedIndexedAccess": true,
+    "resolveJsonModule": true,
+    "allowJs": true,
+    "strict": true,
+    "strictNullChecks": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "isolatedModules": true,
+    "skipLibCheck": true,
+    "rootDir": ".",
+    "outDir": "dist"
+  },
+  "include": ["src/**/*.ts", "src/**/*.tsx"],
+  "exclude": ["dist", "node_modules"]
+}
@@ -0,0 +1,30 @@
+import { createLocalFontProcessor } from '@unocss/preset-web-fonts/local';
+import { defineConfig, presetWind3, presetTypography, presetWebFonts, transformerVariantGroup, transformerDirectives, presetIcons } from 'unocss';
+
+export default defineConfig({
+  presets: [
+    presetWind3(),
+    presetTypography(),
+    presetIcons({
+      scale: 1.2,
+      warn: true,
+    }),
+    presetWebFonts({
+      fonts: {
+        mono: {
+          name: 'Maple Mono',
+          provider: 'fontsource',
+        },
+      },
+      processors: createLocalFontProcessor({
+        cacheDir: 'node_modules/.cache/unocss/fonts',
+        fontAssetsDir: 'public/assets/fonts/cache',
+        fontServeBaseUrl: '/assets/fonts/cache',
+      }),
+    }),
+  ],
+  transformers: [
+    transformerDirectives(),
+    transformerVariantGroup(),
+  ],
+});
@@ -0,0 +1,13 @@
+import { defineConfig } from 'vite'
+import UnoCSS from 'unocss/vite'
+import react from '@vitejs/plugin-react'
+import topLevelAwait from 'vite-plugin-top-level-await'
+
+// https://vite.dev/config/
+export default defineConfig({
+  plugins: [react(), UnoCSS(), topLevelAwait()],
+  build: {
+    assetsInlineLimit: 0,
+    minify: true
+  },
+})
@@ -0,0 +1,19 @@
+{
+  "name": "@maigolabs/needle-playground-bot",
+  "version": "1.0.0",
+  "type": "module",
+  "scripts": {
+    "start": "tsx src/index.ts",
+    "typecheck": "tsc"
+  },
+  "license": "AGPL-3.0",
+  "packageManager": "pnpm@10.20.0",
+  "private": true,
+  "dependencies": {
+    "@maigolabs/needle": "workspace:*",
+    "telegraf": "^4.16.3"
+  },
+  "devDependencies": {
+    "@types/node": "^24.10.4"
+  }
+}
@@ -0,0 +1,78 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import url from 'node:url';
+
+import { TokenType } from '@maigolabs/needle/common';
+import { buildInvertedIndex, createTokenizer } from '@maigolabs/needle/indexer';
+import { loadInvertedIndex, inspectSearchResult, searchInvertedIndex } from '@maigolabs/needle/searcher';
+import { TokenizerBuilder } from '@patdx/kuromoji';
+import NodeDictionaryLoader from '@patdx/kuromoji/node';
+import { Telegraf } from 'telegraf';
+
+const botToken = process.env.TELEGRAM_BOT_TOKEN!;
+const targetChatId = parseInt(process.env.TARGET_CHAT_ID!);
+if (!botToken || isNaN(targetChatId)) throw new Error('Missing environment variables TELEGRAM_BOT_TOKEN or TARGET_CHAT_ID');
+
+const bot = new Telegraf(botToken);
+
+const escapeHtml = (s: string) => s.replaceAll('&', '&amp;').replaceAll('<', '&lt;').replaceAll('>', '&gt;');
+
+const commands = await (async () => {
+  const kuromojiDictPath = path.resolve(url.fileURLToPath(import.meta.resolve('@patdx/kuromoji')), '..', '..', 'dict');
+  const kuromoji = await new TokenizerBuilder({ loader: new NodeDictionaryLoader({ dic_path: kuromojiDictPath }) }).build();
+
+  const documents = (await fs.promises.readFile('../../example.txt', 'utf-8')).split('\n').filter(line => line.length > 0);
+  const startBuildInvertedIndex = performance.now();
+  const compressed = buildInvertedIndex(documents, { kuromoji });
+  const endBuildInvertedIndex = performance.now();
+  console.log(`Built inverted index in ${endBuildInvertedIndex - startBuildInvertedIndex}ms`);
+
+  const startLoadInvertedIndex = performance.now();
+  const invertedIndex = loadInvertedIndex(compressed);
+  const endLoadInvertedIndex = performance.now();
+  console.log(`Loaded inverted index in ${endLoadInvertedIndex - startLoadInvertedIndex}ms`);
+
+  const codify = (text: string) => `<code>${escapeHtml(text)}</code>`;
+  return {
+    needle: (text: string) => {
+      const startSearch = performance.now();
+      const results = searchInvertedIndex(invertedIndex, text);
+      const endSearch = performance.now();
+      const searchDuration = (endSearch - startSearch).toFixed(3);
+      const showingResults = results.slice(0, 5);
+      return results.length === 0 ? codify(`No results found after ${searchDuration}ms`) : [
+        codify(`Search completed in ${searchDuration}ms, showing ${showingResults.length}/${results.length} results:\n`),
+        ...showingResults.map(result => inspectSearchResult(result, true)),
+      ].join('\n').trimEnd();
+    },
+    tokenize: (text: string) => {
+      const startTokenize = performance.now();
+      const tokenizer = createTokenizer({ kuromoji });
+      const tokens = tokenizer.tokenize(text);
+      const tokenDefinitions = [...tokenizer.tokens.values()];
+      const endTokenize = performance.now();
+      const tokenizeDuration = (endTokenize - startTokenize).toFixed(3);
+      return codify(tokens.length === 0 ? `No tokens emitted after ${tokenizeDuration}ms` : [
+        `Tokenization completed in ${tokenizeDuration}ms, emitted ${tokens.length} tokens:`,
+        ...tokens
+          .map(token => [tokenDefinitions[token.id]!, token, [...text].slice(token.start, token.end).join('')] as const)
+          .map(([token, { start, end }, originalPhrase]) => `  ${TokenType[token.type]}: ${JSON.stringify(token.text)} <- ${JSON.stringify(originalPhrase)} [${start}, ${end}]`),
+      ].join('\n'));
+    },
+  };
+})();
+
+bot.on('message', async ctx => {
+  const text = 'text' in ctx.message ? ctx.message.text :  undefined;
+  console.log(`${ctx.chat.id ?? 'N/A'}:${ctx.from!.id} ${JSON.stringify(text)}`);
+  if (ctx.chat.id === targetChatId) {
+    if (text?.startsWith('/needle ')) {
+      await ctx.reply(commands.needle(text.slice('/needle '.length)), { parse_mode: 'HTML' });
+    } else if (text?.startsWith('/tokenize ')) {
+      await ctx.reply(commands.tokenize(text.slice('/tokenize '.length)), { parse_mode: 'HTML' });
+    }
+  }
+});
+
+await bot.launch();
+void bot.telegram.getMe().then(me => console.log(`Bot logged in as ${me.first_name} (@${me.username})`));
@@ -0,0 +1,23 @@
+{
+  "compilerOptions": {
+    "target": "ESNext",
+    "jsx": "preserve",
+    "lib": ["DOM", "DOM.Iterable", "ESNext", "WebWorker"],
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "noUncheckedIndexedAccess": true,
+    "resolveJsonModule": true,
+    "allowJs": true,
+    "strict": true,
+    "strictNullChecks": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "isolatedModules": true,
+    "skipLibCheck": true,
+    "rootDir": ".",
+    "outDir": "dist"
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["dist", "node_modules"]
+}
@@ -0,0 +1,371 @@
+
+# Created by https://www.toptal.com/developers/gitignore/api/git,visualstudio
+# Edit at https://www.toptal.com/developers/gitignore?templates=git,visualstudio
+
+### Git ###
+# Created by git for backups. To disable backups in Git:
+# $ git config --global mergetool.keepBackup false
+*.orig
+
+# Created by git when using merge tools for conflicts
+*.BACKUP.*
+*.BASE.*
+*.LOCAL.*
+*.REMOTE.*
+*_BACKUP_*.txt
+*_BASE_*.txt
+*_LOCAL_*.txt
+*_REMOTE_*.txt
+
+### VisualStudio ###
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*[.json, .xml, .info]
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# End of https://www.toptal.com/developers/gitignore/api/git,visualstudio
@@ -0,0 +1,31 @@
+<Project>
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.0</TargetFramework>
+    <PreserveCompilationContext>true</PreserveCompilationContext>
+    <LangVersion>14</LangVersion>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <NoWarn>CA1822</NoWarn>
+    <ProjectName>MaigoLabs.NeedLe</ProjectName>
+    <VSTestLogger>console%3Bverbosity=detailed</VSTestLogger>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <IsPackable>false</IsPackable>
+    <Version>1.0.0</Version>
+    <Authors>Menci</Authors>
+    <Description>Fuzzy search engine for small text pieces, with Chinese/Japanese pronunciation support</Description>
+    <PackageLicenseExpression>AGPL-3.0-only</PackageLicenseExpression>
+    <RepositoryUrl>https://github.com/MaigoLabs/needLe</RepositoryUrl>
+    <RepositoryType>git</RepositoryType>
+    <PackageProjectUrl>https://github.com/MaigoLabs/needLe</PackageProjectUrl>
+    <PackageTags>search;fuzzy;cjk;chinese;japanese;pinyin;romaji</PackageTags>
+    <PackageReadmeFile>README.md</PackageReadmeFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Include="..\README.md" Pack="true" PackagePath="\" />
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,19 @@
+<Project>
+  <PropertyGroup>
+    <ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
+  </PropertyGroup>
+  <ItemGroup>
+    <PackageVersion Include="Microsoft.NET.Test.Sdk" Version="18.0.1" />
+    <PackageVersion Include="coverlet.collector" Version="6.0.4" />
+    <PackageVersion Include="xunit" Version="2.9.3" />
+    <PackageVersion Include="xunit.runner.visualstudio" Version="3.1.5" />
+  </ItemGroup>
+  <ItemGroup>
+    <PackageVersion Include="DotNetCampus.LatestCSharpFeatures" Version="13.0.1" />
+    <PackageVersion Include="hyjiacan.pinyin4net" Version="4.1.1" />
+    <PackageVersion Include="MeCab.DotNet" Version="1.2.0" />
+    <PackageVersion Include="MyNihongo.KanaConverter" Version="1.0.5" />
+    <PackageVersion Include="OpenccNetLib" Version="1.4.0" />
+    <PackageVersion Include="Telegram.Bot" Version="22.5.0" />
+  </ItemGroup>
+</Project>
@@ -0,0 +1,45 @@
+namespace MaigoLabs.NeedLe.Common;
+
+// This is for global normalization for any input and documents.
+public static class CommonNormalization
+{
+    public static int NormalizeCodePoint(int codePoint)
+    {
+        // Fullwidth ASCII -> Halfwidth ASCII
+        if (codePoint >= 0xFF01 && codePoint <= 0xFF5E) return ToLowerCaseAscii(codePoint - 0xFEE0);
+        // Fullwidth space -> Halfwidth space
+        else if (codePoint == /* '　' */ 0x3000) return ' ';
+        // Halfwidth kana (U+FF66 - U+FF9D) -> Fullwidth kana
+        else if (codePoint >= 0xFF66 && codePoint <= 0xFF9D) return HALF_TO_FULL_KANA.TryGetValue(codePoint, out var value) ? value : codePoint;
+        else if (codePoint == /* '｡' */ 0xFF61) return '。';
+        else if (codePoint == /* '｢' */ 0xFF62) return '「';
+        else if (codePoint == /* '｣' */ 0xFF63) return '」';
+        else if (codePoint == /* '､' */ 0xFF64) return '、';
+        else if (codePoint == /* '･' */ 0xFF65) return '・';
+        else if (codePoint == /* 'ﾞ' */ 0xFF9E || codePoint == /* '゛' */ 0x309B) return 0x3099; // -> COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
+        else if (codePoint == /* 'ﾟ' */ 0xFF9F || codePoint == /* '゜' */ 0x309C) return 0x309A; // -> COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+        else return ToLowerCaseAscii(codePoint);
+    }
+
+    private static readonly Dictionary<int, int> HALF_TO_FULL_KANA = new Dictionary<int, int> {
+        ['ｦ'] = 'ヲ', ['ｧ'] = 'ァ', ['ｨ'] = 'ィ', ['ｩ'] = 'ゥ', ['ｪ'] = 'ェ', ['ｫ'] = 'ォ',
+        ['ｬ'] = 'ャ', ['ｭ'] = 'ュ', ['ｮ'] = 'ョ', ['ｯ'] = 'ッ',
+        ['ｰ'] = 'ー',
+        ['ｱ'] = 'ア', ['ｲ'] = 'イ', ['ｳ'] = 'ウ', ['ｴ'] = 'エ', ['ｵ'] = 'オ',
+        ['ｶ'] = 'カ', ['ｷ'] = 'キ', ['ｸ'] = 'ク', ['ｹ'] = 'ケ', ['ｺ'] = 'コ',
+        ['ｻ'] = 'サ', ['ｼ'] = 'シ', ['ｽ'] = 'ス', ['ｾ'] = 'セ', ['ｿ'] = 'ソ',
+        ['ﾀ'] = 'タ', ['ﾁ'] = 'チ', ['ﾂ'] = 'ツ', ['ﾃ'] = 'テ', ['ﾄ'] = 'ト',
+        ['ﾅ'] = 'ナ', ['ﾆ'] = 'ニ', ['ﾇ'] = 'ヌ', ['ﾈ'] = 'ネ', ['ﾉ'] = 'ノ',
+        ['ﾊ'] = 'ハ', ['ﾋ'] = 'ヒ', ['ﾌ'] = 'フ', ['ﾍ'] = 'ヘ', ['ﾎ'] = 'ホ',
+        ['ﾏ'] = 'マ', ['ﾐ'] = 'ミ', ['ﾑ'] = 'ム', ['ﾒ'] = 'メ', ['ﾓ'] = 'モ',
+        ['ﾔ'] = 'ヤ', ['ﾕ'] = 'ユ', ['ﾖ'] = 'ヨ',
+        ['ﾗ'] = 'ラ', ['ﾘ'] = 'リ', ['ﾙ'] = 'ル', ['ﾚ'] = 'レ', ['ﾛ'] = 'ロ',
+        ['ﾜ'] = 'ワ', ['ﾝ'] = 'ン',
+    };
+
+    public static int ToLowerCaseAscii(int codePoint) => codePoint >= 0x41 && codePoint <= 0x5A ? codePoint + 0x20 : codePoint;
+
+    public static bool IsHiraganaRange(int codePoint) => (codePoint >= 0x3041 && codePoint <= 0x3096) || (codePoint >= 0x309D && codePoint <= 0x309E);
+    public static int ToKatakana(int codePoint) => IsHiraganaRange(codePoint) ? codePoint + 0x60 : codePoint;
+    public static string ToKatakana(string text) => string.Concat(text.Select(c => (char)ToKatakana(c)));
+}
@@ -0,0 +1,21 @@
+namespace MaigoLabs.NeedLe.Common;
+
+public static class CommonUtils
+{
+    public static bool IsWhitespace(int codePoint) =>
+        codePoint == 0x0009 /* \t */ ||
+        codePoint == 0x000A /* \n */ ||
+        codePoint == 0x000B /* Vertical Tab */ ||
+        codePoint == 0x000C /* \f */ ||
+        codePoint == 0x000D /* \r */ ||
+        codePoint == 0x0020 /* Space */ ||
+        codePoint == 0x0085 /* Next Line (NEL) */ ||
+        codePoint == 0x00A0 /* No-Break Space */ ||
+        codePoint == 0x1680 /* Ogham Space Mark */ ||
+        codePoint >= 0x2000 && codePoint <= 0x200A ||
+        codePoint == 0x2028 /* Line Separator */ ||
+        codePoint == 0x2029 /* Paragraph Separator */ ||
+        codePoint == 0x202F /* Narrow No-Break Space */ ||
+        codePoint == 0x205F /* Medium Mathematical Space */ ||
+        codePoint == 0x3000 /* Ideographic Space */;
+}
@@ -0,0 +1,25 @@
+using System.Text;
+
+namespace MaigoLabs.NeedLe.Common.Extensions;
+
+public static class UnicodeExtensions
+{
+    public static IEnumerable<int> ToCodePoints(this string s)
+    {
+        for (int i = 0; i < s.Length; i++)
+        {
+            int codePoint = char.ConvertToUtf32(s, i);
+            if (codePoint > 0xffff) i++;
+            yield return codePoint;
+        }
+    }
+
+    public static StringBuilder ToUtf32StringBuilder(this IEnumerable<int> codePoints)
+    {
+        var sb = new StringBuilder();
+        foreach (var codePoint in codePoints) sb.Append(char.ConvertFromUtf32(codePoint));
+        return sb;
+    }
+
+    public static string ToUtf32String(this IEnumerable<int> codePoints) => ToUtf32StringBuilder(codePoints).ToString();
+}
@@ -0,0 +1,19 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.0</TargetFramework>
+    <OutputType>Library</OutputType>
+    <RootNamespace>$(ProjectName).Common</RootNamespace>
+    <AssemblyName>$(RootNamespace)</AssemblyName>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <IsPackable>true</IsPackable>
+    <PackageId>$(RootNamespace)</PackageId>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="DotNetCampus.LatestCSharpFeatures" PrivateAssets="all" />
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,33 @@
+namespace MaigoLabs.NeedLe.Common;
+
+public class TrieNode
+{
+    public required TrieNode? Parent { get; set; }
+    public required Dictionary<int, TrieNode> Children { get; set; } // Unicode code point -> child node
+    public required List<int> TokenIds { get; set; }
+    public required List<int> SubTreeTokenIds { get; set; } // Empty on root.
+}
+
+public static class TrieNodeExtensions
+{
+    public static TrieNode? TraverseStep(this TrieNode? node, int codePoint, bool isIgnorable = false) =>
+        (node?.Children.TryGetValue(codePoint, out var child) ?? false)
+            ? child
+            : isIgnorable ? node : null;
+
+    public static TrieNode? Traverse(this TrieNode? node, int[] codePoints, bool isIgnorable = false)
+    {
+        if (node == null) return null;
+        foreach (var codePoint in codePoints)
+        {
+            node = node?.TraverseStep(codePoint, isIgnorable);
+            if (node == null) return null;
+        }
+        return node;
+    }
+
+    public static List<int> GetTokenIds(this TrieNode? node, bool includeSubTree = false) =>
+        (includeSubTree ? node?.SubTreeTokenIds : node?.TokenIds) ?? [];
+
+    public static bool IsTokenExactMatch(this TrieNode? node, int tokenId) => node?.TokenIds.Contains(tokenId) ?? false;
+}
@@ -0,0 +1,20 @@
+namespace MaigoLabs.NeedLe.Common.Types;
+
+#pragma warning disable IDE1006 // Naming rule violation
+
+// For compatibility with TypeScript, we use camelCase property names here.
+
+public class CompressedInvertedIndex
+{
+    public required string[] documents { get; set; }
+    public required int[] tokenTypes { get; set; } // Use int values here instead of TokenType enum to avoid JSON serialization issues.
+    public required List<int[]>[] tokenReferences { get; set; } // tokenId -> [documentId, start1, end1, start2, end2, ...]
+    public required CompressedInvertedIndexTries tries { get; set; }
+}
+
+public class CompressedInvertedIndexTries
+{
+    public required int[] romaji { get; set; }
+    public required int[] kana { get; set; }
+    public required int[] other { get; set; }
+}
@@ -0,0 +1,9 @@
+namespace MaigoLabs.NeedLe.Common.Types;
+
+public class OffsetSpan
+{
+    public required int Start { get; init; }
+    public required int End { get; init; }
+
+    public int Length => End - Start;
+}
@@ -0,0 +1,9 @@
+namespace MaigoLabs.NeedLe.Common.Types;
+
+public class TokenDefinition
+{
+    public required int Id { get; set; }
+    public required TokenType Type { get; set; }
+    public required string Text { get; set; }
+    public required int CodePointLength { get; set; }
+}
@@ -0,0 +1,10 @@
+namespace MaigoLabs.NeedLe.Common.Types;
+
+public enum TokenType
+{
+    Raw,
+    Kana,
+    Romaji,
+    Han,
+    Pinyin,
+}
@@ -0,0 +1,80 @@
+using MaigoLabs.NeedLe.Common.Extensions;
+using OpenccNetLib;
+
+namespace MaigoLabs.NeedLe.Indexer.Han;
+
+public class HanVariantProvider
+{
+    private readonly Dictionary<int, int[]> EXCHANGE_MAP;
+    public HanVariantProvider(DictWithMaxLength[]? dicts = null)
+    {
+        dicts ??=
+        [
+            DictionaryLib.Provider.hk_variants,
+            DictionaryLib.Provider.hk_variants_rev,
+            DictionaryLib.Provider.jp_variants,
+            DictionaryLib.Provider.jp_variants_rev,
+            DictionaryLib.Provider.st_characters,
+            DictionaryLib.Provider.ts_characters,
+            DictionaryLib.Provider.tw_variants,
+            DictionaryLib.Provider.tw_variants_rev,
+        ];
+        EXCHANGE_MAP = BuildHanExchangeMap(dicts);
+    }
+
+    private Dictionary<int, int[]> BuildHanExchangeMap(DictWithMaxLength[] dicts)
+    {
+        var unionFindSet = new UnionFindSet();
+        foreach (var dict in dicts) foreach (var item in dict.Dict)
+        {
+            var from = item.Key.ToCodePoints().ToArray();
+            var to = item.Value.ToCodePoints().ToArray();
+            if (from.Length != 1 || to.Length != 1) continue;
+            unionFindSet.Union(from[0], to[0]);
+        }
+        var variants = new Dictionary<int, List<int>>();
+        foreach (var x in unionFindSet.Keys)
+        {
+            var parent = unionFindSet.Find(x);
+            if (!variants.TryGetValue(parent, out var list)) variants[parent] = list = [];
+            if (x != parent) variants[x] = list;
+            list.Add(x);
+        }
+        return variants.ToDictionary(item => item.Key, item => item.Value.OrderBy(x => x).ToArray());
+    }
+
+    // https://github.com/google/re2/blob/e7aec5985072c1dbe735add802653ef4b36c231a/re2/unicode_groups.cc#L5590-L5615
+    private static readonly (int Min, int Max)[] RE2_SCRIPT_HAN_RENAGES =
+    [
+        // Han_range16
+        (11904, 11929),
+        (11931, 12019),
+        (12032, 12245),
+        (12293, 12293),
+        (12295, 12295),
+        (12321, 12329),
+        (12344, 12347),
+        (13312, 19903),
+        (19968, 40959),
+        (63744, 64109),
+        (64112, 64217),
+        // Han_range32
+        (94178, 94179),
+        (94192, 94193),
+        (131072, 173791),
+        (173824, 177977),
+        (177984, 178205),
+        (178208, 183969),
+        (183984, 191456),
+        (191472, 192093),
+        (194560, 195101),
+        (196608, 201546),
+        (201552, 205743),
+    ];
+
+    public static bool IsHanCharacter(int codePoint) => RE2_SCRIPT_HAN_RENAGES.Any(range => codePoint >= range.Min && codePoint <= range.Max);
+
+    public int[] GetHanVariants(int codePoint) => EXCHANGE_MAP.TryGetValue(codePoint, out var variants)
+        ? variants
+        : IsHanCharacter(codePoint) ? [codePoint] : [];
+}
@@ -0,0 +1,19 @@
+using hyjiacan.py4n;
+
+namespace MaigoLabs.NeedLe.Indexer.Han;
+
+public static class PinyinHelper
+{
+    private static readonly string[] PINYIN_INITIALS = ["b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s", "y", "w"];
+    private static readonly Dictionary<string, string> PINYIN_FINALS_FUZZY_MAP = new() { ["ang"] = "an", ["eng"] = "en", ["ing"] = "in" };
+
+    public static IEnumerable<string> GetPinyinCandidates(int codePoint) => codePoint < char.MinValue || codePoint > char.MaxValue || !PinyinUtil.IsHanzi((char)codePoint) ? [] :
+        Pinyin4Net.GetPinyin((char)codePoint, PinyinFormat.LOWERCASE | PinyinFormat.WITHOUT_TONE).Where(pinyin => pinyin.Length > 0).SelectMany(pinyin =>
+        {
+            var initial = PINYIN_INITIALS.FirstOrDefault(initial => pinyin.StartsWith(initial));
+            var initialAlphabet = initial != null ? initial[..1] : pinyin[..1];
+            var fuzzySuffix = pinyin.Length < 3 ? null : pinyin[^3..];
+            var fuzzyPinyin = fuzzySuffix != null && PINYIN_FINALS_FUZZY_MAP.TryGetValue(fuzzySuffix, out var fuzzySuffixTarget) ? pinyin[..^3] + fuzzySuffixTarget : null;
+            return new string?[] { pinyin, initial, initialAlphabet, fuzzyPinyin }.OfType<string>();
+        }).Distinct();
+}
@@ -0,0 +1,33 @@
+namespace MaigoLabs.NeedLe.Indexer.Han;
+
+public class UnionFindSet
+{
+    private Dictionary<int, int> Parent { get; set; } = [];
+    private Dictionary<int, int> Rank { get; set; } = [];
+
+    public IEnumerable<int> Keys => Parent.Keys;
+
+    public int Find(int x)
+    {
+        if (!Parent.TryGetValue(x, out var parent)) return Parent[x] = x;
+        else if (x == parent) return x;
+        else return Parent[x] = Find(parent);
+    }
+
+    public void Union(int x, int y)
+    {
+        x = Find(x);
+        y = Find(y);
+        if (x == y) return;
+        int rankX = GetRank(x), rankY = GetRank(y);
+        if (rankX < rankY) Parent[x] = y;
+        else if (rankX > rankY) Parent[y] = x;
+        else
+        {
+            Parent[y] = x;
+            Rank[x] = rankX + 1;
+        }
+    }
+
+    private int GetRank(int x) => !Rank.TryGetValue(x, out var rank) ? 0 : rank;
+}
@@ -0,0 +1,57 @@
+using MaigoLabs.NeedLe.Common;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Common.Types;
+using MaigoLabs.NeedLe.Indexer.Japanese;
+using MaigoLabs.NeedLe.Indexer.Trie;
+
+namespace MaigoLabs.NeedLe.Indexer;
+
+public static class InvertedIndexBuilder
+{
+    private static TrieNode BuildTypedTrie(IEnumerable<TokenDefinition> tokenDefinitions, Func<TokenType, bool> typePredicate) =>
+        TrieBuilder.BuildTrie(tokenDefinitions
+            .Where(token => typePredicate(token.Type))
+            .Select(token => (token.Id, CodePoints: token.Text.ToCodePoints())));
+
+    public static CompressedInvertedIndex BuildInvertedIndex(string[] documents, TokenizerOptions? tokenizerOptions = null)
+    {
+        var tokenizer = new Tokenizer(tokenizerOptions);
+        var documentTokens = documents.Select(tokenizer.Tokenize).ToArray();
+
+        var tokenDefinitions = tokenizer.Tokens.Values;
+        var romajiRoot = BuildTypedTrie(tokenDefinitions, type => type == TokenType.Romaji);
+        var kanaRoot = BuildTypedTrie(tokenDefinitions, type => type == TokenType.Kana);
+        var otherRoot = BuildTypedTrie(tokenDefinitions, type => type != TokenType.Romaji && type != TokenType.Kana);
+        TrieBuilder.GraftTriePaths(romajiRoot, JapaneseNormalization.NORMALIZE_RULES_ROMAJI_CODEPOINTS);
+        TrieBuilder.GraftTriePaths(kanaRoot, JapaneseNormalization.NORMALIZE_RULES_KANA_DAKUTEN_CODEPOINTS);
+
+        var invertedIndex = new CompressedInvertedIndex
+        {
+            documents = documents,
+            tokenTypes = [.. tokenDefinitions.Select(token => (int)token.Type)],
+            tokenReferences = [.. tokenDefinitions.Select(_ => new List<int[]>())],
+            tries = new CompressedInvertedIndexTries
+            {
+                romaji = TrieSerializer.Serialize(romajiRoot),
+                kana = TrieSerializer.Serialize(kanaRoot),
+                other = TrieSerializer.Serialize(otherRoot),
+            },
+        };
+        for (var documentId = 0; documentId < documents.Length; documentId++)
+        {
+            var tokens = documentTokens[documentId];
+            var tokenOccurrences = new Dictionary<int, List<int>>();
+            foreach (var token in tokens)
+            {
+                if (!tokenOccurrences.TryGetValue(token.Id, out var occurrences)) tokenOccurrences[token.Id] = occurrences = [];
+                occurrences.Add(token.Start);
+                occurrences.Add(token.End);
+            }
+            foreach (var (tokenId, occurrences) in tokenOccurrences)
+            {
+                invertedIndex.tokenReferences[tokenId].Add([documentId, .. occurrences]);
+            }
+        }
+        return invertedIndex;
+    }
+}
@@ -0,0 +1,69 @@
+using MaigoLabs.NeedLe.Common.Extensions;
+
+namespace MaigoLabs.NeedLe.Indexer.Japanese;
+
+public static class JapaneseNormalization
+{
+    public delegate string Normalizer(string text);
+
+    public static Normalizer CreateNormalizer(Dictionary<string, string> rules) => text =>
+    {
+        while (true)
+        {
+            var beforeCurrentIteration = text;
+            foreach (var (from, to) in rules) text = text.Replace(from, to);
+            if (text == beforeCurrentIteration) break;
+        }
+        return text;
+    };
+
+    public static IEnumerable<(int[] From, int[] To)> ToCodePointPairs(Dictionary<string, string> rules) =>
+        rules.Select(rule => (From: rule.Key.ToCodePoints().ToArray(), To: rule.Value.ToCodePoints().ToArray()));
+
+    public static readonly Dictionary<string, string> NORMALIZE_RULES_ROMAJI = new()
+    {
+        // Remove all long vowels (sa-ba- -> saba)
+        ["-"] = "",
+        // Collapse consecutive vowels
+        ["aa"] = "a",
+        ["ii"] = "i",
+        ["uu"] = "u",
+        ["ee"] = "e",
+        ["oo"] = "o",
+        ["ou"] = "o",
+        // mb/mp/mm -> nb/np/nm (shimbun -> shinbun)
+        ["mb"] = "nb",
+        ["mp"] = "np",
+        ["mm"] = "nm",
+        // Others
+        ["sha"] = "sya",
+        ["tsu"] = "tu",
+        ["chi"] = "ti",
+        ["shi"] = "si",
+        ["ji"] = "zi",
+    };
+    public static readonly IEnumerable<(int[] From, int[] To)> NORMALIZE_RULES_ROMAJI_CODEPOINTS = ToCodePointPairs(NORMALIZE_RULES_ROMAJI);
+    public static readonly Normalizer NormalizeRomaji = CreateNormalizer(NORMALIZE_RULES_ROMAJI);
+
+    public static readonly Dictionary<string, string> NORMALIZE_RULES_KANA_DAKUTEN = new()
+    {
+        ["う\u3099"] = "ゔ",
+        ["か\u3099"] = "が", ["き\u3099"] = "ぎ", ["く\u3099"] = "ぐ", ["け\u3099"] = "げ", ["こ\u3099"] = "ご",
+        ["さ\u3099"] = "ざ", ["し\u3099"] = "じ", ["す\u3099"] = "ず", ["せ\u3099"] = "ぜ", ["そ\u3099"] = "ぞ",
+        ["た\u3099"] = "だ", ["ち\u3099"] = "ぢ", ["つ\u3099"] = "づ", ["て\u3099"] = "で", ["と\u3099"] = "ど",
+        ["は\u3099"] = "ば", ["ひ\u3099"] = "び", ["ふ\u3099"] = "ぶ", ["へ\u3099"] = "べ", ["ほ\u3099"] = "ぼ",
+        ["は\u309A"] = "ぱ", ["ひ\u309A"] = "ぴ", ["ふ\u309A"] = "ぷ", ["へ\u309A"] = "ぺ", ["ほ\u309A"] = "ぽ",
+        ["ゝ\u3099"] = "ゞ",
+
+        ["ウ\u3099"] = "ヴ",
+        ["カ\u3099"] = "ガ", ["キ\u3099"] = "ギ", ["ク\u3099"] = "グ", ["ケ\u3099"] = "ゲ", ["コ\u3099"] = "ゴ",
+        ["サ\u3099"] = "ザ", ["シ\u3099"] = "ジ", ["ス\u3099"] = "ズ", ["セ\u3099"] = "ゼ", ["ソ\u3099"] = "ゾ",
+        ["タ\u3099"] = "ダ", ["チ\u3099"] = "ヂ", ["ツ\u3099"] = "ヅ", ["テ\u3099"] = "デ", ["ト\u3099"] = "ド",
+        ["ハ\u3099"] = "バ", ["ヒ\u3099"] = "ビ", ["フ\u3099"] = "ブ", ["ヘ\u3099"] = "ベ", ["ホ\u3099"] = "ボ",
+        ["ハ\u309A"] = "パ", ["ヒ\u309A"] = "ピ", ["フ\u309A"] = "プ", ["ヘ\u309A"] = "ペ", ["ホ\u309A"] = "ポ",
+        ["ワ\u3099"] = "ヷ", ["ヰ\u3099"] = "ヸ", ["ヱ\u3099"] = "ヹ", ["ヲ\u3099"] = "ヺ",
+        ["ヽ\u3099"] = "ヾ",
+    };
+    public static readonly IEnumerable<(int[] From, int[] To)> NORMALIZE_RULES_KANA_DAKUTEN_CODEPOINTS = ToCodePointPairs(NORMALIZE_RULES_KANA_DAKUTEN);
+    public static readonly Normalizer NormalizeKanaDakuten = CreateNormalizer(NORMALIZE_RULES_KANA_DAKUTEN);
+}
@@ -0,0 +1,52 @@
+using MaigoLabs.NeedLe.Indexer.Han;
+using MyNihongo.KanaConverter;
+
+namespace MaigoLabs.NeedLe.Indexer.Japanese;
+
+public static class JapaneseUtils
+{
+    public static bool IsMaybeJapanese(int codePoint) =>
+        HanVariantProvider.IsHanCharacter(codePoint) ||
+        IsKana(codePoint) ||
+        IsJapaneseSoundMark(codePoint) ||
+        codePoint == 0x3005 || codePoint == 0x3006 || codePoint == 0x30FC;
+
+    // See also Common/Normalization.cs
+    public static bool IsJapaneseSoundMark(int codePoint) => codePoint == 0x3099 || codePoint == 0x309A;
+    public static string StripJapaneseSoundMarks(string text) => string.Concat(text.Where(codePoint => !IsJapaneseSoundMark(codePoint)));
+
+    public static bool IsKana(int codePoint) => (codePoint >= 0x3041 && codePoint <= 0x309F) || (codePoint >= 0x30A0 && codePoint <= 0x30FF);
+
+    private static readonly int[] KANAS_CANNOT_BE_FIRST =
+    [
+        'ァ', 'ィ', 'ゥ', 'ェ', 'ォ',
+        'ぁ', 'ぃ', 'ぅ', 'ぇ', 'ぉ',
+        'ャ', 'ュ', 'ョ',
+        'ゃ', 'ゅ', 'ょ',
+        'ヮ', 'ゎ',
+        'ㇰ', 'ㇱ', 'ㇲ', 'ㇳ', 'ㇴ', 'ㇵ', 'ㇶ', 'ㇷ', 'ㇸ', 'ㇹ', 'ㇺ', 'ㇻ', 'ㇼ', 'ㇽ', 'ㇾ', 'ㇿ',
+        'ー',
+    ];
+
+    private static readonly int[] KANAS_CANNOT_BE_LAST =
+    [
+        'ッ', 'っ'
+    ];
+
+    public static string ToRomajiStrictly(string kanaText)
+    {
+        if (kanaText.Length == 0) return "";
+        if (KANAS_CANNOT_BE_FIRST.Contains(kanaText[0])) return "";
+        if (KANAS_CANNOT_BE_LAST.Contains(kanaText[^1])) return "";
+        string romaji;
+        try { romaji = kanaText.ToRomaji(); }
+        catch { return ""; }
+        if (!romaji.All(c => c is >= 'a' and <= 'z')) return "";
+        return romaji;
+    }
+
+    public static bool IsValidJapanesePhrase(ReadOnlySpan<int> codePoints, int start, int length) =>
+        // Skip splittings that cause sound marks to occur in the first position of a phrase
+        !IsJapaneseSoundMark(codePoints[start]) && (start + length == codePoints.Length || !IsJapaneseSoundMark(codePoints[start + length]));
+    public static bool IsValidJapanesePhrase(ReadOnlyMemory<int> codePoints, int start, int length) => IsValidJapanesePhrase(codePoints.Span, start, length);
+}
@@ -0,0 +1,105 @@
+using System.Runtime.InteropServices;
+using MaigoLabs.NeedLe.Common;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MeCab;
+using MeCab.Core;
+
+namespace MaigoLabs.NeedLe.Indexer.Japanese;
+
+public class Transcription
+{
+    public required int Start { get; set; }
+    public required int Length { get; set; }
+    public required string[] Transcriptions { get; set; }
+}
+
+public delegate IEnumerable<Transcription> TranscriptionEnumerator(ReadOnlyMemory<int> codePoints);
+public delegate bool IsValidPhraseDelegate(ReadOnlyMemory<int> codePoints, int start, int length);
+public delegate HashSet<string> GetAllTranscriptionsDelegate(string phrase);
+
+public class TranscriptionProvider
+{
+    public MeCabDictionary[] Dictionaries { get; set; }
+
+    public TranscriptionProvider(MeCabDictionary[]? dictionaries = null)
+    {
+        if (dictionaries == null)
+        {
+            var param = new MeCabParam();
+            param.LoadDicRC();
+            var dictionary = new MeCabDictionary();
+            dictionary.Open(Path.Combine(param.DicDir, "sys.dic"));
+            dictionaries = [dictionary];
+        }
+        Dictionaries = dictionaries;
+    }
+
+    public static TranscriptionEnumerator CreateTranscriptionEnumerator(IsValidPhraseDelegate isValidPhrase, GetAllTranscriptionsDelegate getAllTranscriptions) => codePoints =>
+    {
+        var resultMap = new Dictionary<(int Start, int Length), Transcription>();
+        for (int phraseLength = 1; phraseLength <= codePoints.Length; phraseLength++) for (int start = 0; start + phraseLength <= codePoints.Length; start++)
+        {
+            if (!isValidPhrase(codePoints, start, phraseLength)) continue;
+            var phrase = MemoryMarshal.ToEnumerable(codePoints.Slice(start, phraseLength)).ToUtf32String();
+            var atomicTranscriptions = getAllTranscriptions(phrase).Where(transcription => transcription != null).Where(candidateTranscription =>
+            {
+                if (candidateTranscription.Length == 0) return false;
+                // Ensure the transcription is atomic (not a combination of multiple shorter transcriptions, separated by any midpoints)
+                var visitedStates = new HashSet<(int PhrasePosition, int TranscriptionPosition)>();
+                var queue = new Queue<(int PhrasePosition, int TranscriptionPosition)>();
+                queue.Enqueue((0, 0));
+                while (queue.Count > 0)
+                {
+                    var (phrasePosition, transcriptionPosition) = queue.Dequeue();
+                    for (int prefixLength = 1; prefixLength <= phraseLength - phrasePosition; prefixLength++)
+                    {
+                        if (!resultMap.TryGetValue((start + phrasePosition, prefixLength), out var prefixResult)) continue;
+                        foreach (var transcription in prefixResult.Transcriptions) if (string.Compare(candidateTranscription, transcriptionPosition, transcription, 0, transcription.Length) == 0)
+                        {
+                            var nextState = (PhrasePosition: phrasePosition + prefixLength, TranscriptionPosition: transcriptionPosition + transcription.Length);
+                            if (nextState.PhrasePosition == phraseLength && nextState.TranscriptionPosition == candidateTranscription.Length) return false; // Found a valid combination
+                            if (visitedStates.Contains(nextState)) continue;
+                            visitedStates.Add(nextState);
+                            queue.Enqueue(nextState);
+                        }
+                    }
+                }
+                return true;
+            }).ToArray();
+            if (atomicTranscriptions.Length > 0) resultMap[(start, phraseLength)] = new() { Start = start, Length = phraseLength, Transcriptions = atomicTranscriptions };
+        }
+        return resultMap.Values;
+    };
+
+    public HashSet<string> GetAllKanaReadings(string phrase)
+    {
+        var result = new HashSet<string>();
+        var isKana = phrase.All(ch => JapaneseUtils.IsKana(ch));
+        if (isKana) result.Add(CommonNormalization.ToKatakana(phrase));
+        if (isKana && phrase.Length == 1) return result;
+
+        foreach (var dictionary in Dictionaries)
+        {
+            var searchResult = dictionary.ExactMatchSearch(phrase);
+            if (searchResult.Value == -1) continue;
+            var tokens = dictionary.GetToken(searchResult);
+            foreach (var token in tokens)
+            {
+                var feature = dictionary.GetFeature(token.Feature);
+                var parts = feature.Split(',');
+                if (parts.Length > 7) result.Add(CommonNormalization.ToKatakana(parts[7]));
+            }
+        }
+        return result;
+    }
+
+    public HashSet<string> GetAllKanaReadingsWithNormalization(string phrase) =>
+        GetAllKanaReadings(JapaneseUtils.StripJapaneseSoundMarks(JapaneseNormalization.NormalizeKanaDakuten(phrase)));
+
+    public TranscriptionEnumerator EnumerateKanaTranscriptions => CreateTranscriptionEnumerator(
+        JapaneseUtils.IsValidJapanesePhrase,
+        GetAllKanaReadingsWithNormalization);
+    public TranscriptionEnumerator EnumerateRomajiTranscriptions => CreateTranscriptionEnumerator(
+        JapaneseUtils.IsValidJapanesePhrase,
+        phrase => [.. GetAllKanaReadingsWithNormalization(phrase).Select(kana => JapaneseNormalization.NormalizeRomaji(JapaneseUtils.ToRomajiStrictly(kana)))]);
+}
@@ -0,0 +1,29 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.0</TargetFramework>
+    <OutputType>Library</OutputType>
+    <RootNamespace>$(ProjectName).Indexer</RootNamespace>
+    <AssemblyName>$(RootNamespace)</AssemblyName>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <IsPackable>true</IsPackable>
+    <PackageId>$(RootNamespace)</PackageId>
+    <!-- Don't include MeCab dictionaries in this package; let MeCab.DotNet provide them to end users -->
+    <MeCabUseDefaultDictionary>False</MeCabUseDefaultDictionary>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Common\MaigoLabs.NeedLe.Common.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="DotNetCampus.LatestCSharpFeatures" PrivateAssets="all" />
+    <PackageReference Include="hyjiacan.pinyin4net" />
+    <PackageReference Include="MeCab.DotNet" PrivateAssets="analyzers" />
+    <PackageReference Include="MyNihongo.KanaConverter" />
+    <PackageReference Include="OpenccNetLib" PrivateAssets="analyzers" />
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,104 @@
+using MaigoLabs.NeedLe.Common;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Common.Types;
+using MaigoLabs.NeedLe.Indexer.Han;
+using MaigoLabs.NeedLe.Indexer.Japanese;
+
+namespace MaigoLabs.NeedLe.Indexer;
+
+public class TokenizerOptions
+{
+    public HanVariantProvider? HanVariantProvider { get; set; }
+    public TranscriptionProvider? TranscriptionProvider { get; set; }
+}
+
+public class Tokenizer(TokenizerOptions? options = null)
+{
+    public HanVariantProvider HanVariantProvider { get; set; } = options?.HanVariantProvider ?? new HanVariantProvider();
+    public TranscriptionProvider TranscriptionProvider { get; set; } = options?.TranscriptionProvider ?? new TranscriptionProvider();
+
+    public class Token
+    {
+        public required int Id { get; set; }
+        public required int Start { get; set; }
+        public required int End { get; set; }
+    }
+
+    public Dictionary<(TokenType Type, string Text), TokenDefinition> Tokens { get; } = [];
+    private TokenDefinition EnsureToken(TokenType type, string text)
+    {
+        var key = (type, text);
+        if (Tokens.TryGetValue(key, out var tokenDefinition)) return tokenDefinition;
+        tokenDefinition = new TokenDefinition { Id = Tokens.Count, Type = type, Text = text, CodePointLength = text.ToCodePoints().Count() };
+        Tokens.Add(key, tokenDefinition);
+        return tokenDefinition;
+    }
+
+    public List<Token> Tokenize(string text)
+    {
+        var codePoints = text.ToCodePoints().Select(CommonNormalization.NormalizeCodePoint).ToArray();
+        var results = new List<Token>();
+        Action<TokenType /* tokenType */, string /* text */> Emitter(int start, int end) =>
+            (tokenType, codePoints) => results.Add(new Token { Id = EnsureToken(tokenType, codePoints).Id, Start = start, End = end });
+
+        void EmitMaybeJapanese(ReadOnlyMemory<int> codePoints, int offset)
+        {
+            foreach (var combination in TranscriptionProvider.EnumerateKanaTranscriptions(codePoints))
+            {
+                var emit = Emitter(offset + combination.Start, offset + combination.Start + combination.Length);
+                foreach (var transcription in combination.Transcriptions) emit(TokenType.Kana, transcription);
+            }
+            foreach (var combination in TranscriptionProvider.EnumerateRomajiTranscriptions(codePoints))
+            {
+                var emit = Emitter(offset + combination.Start, offset + combination.Start + combination.Length);
+                foreach (var transcription in combination.Transcriptions) emit(TokenType.Romaji, transcription);
+            }
+            for (int i = 0; i < codePoints.Length; i++)
+            {
+                // Single character may have not only kana readings, but also Chinese pronunciations or Simplified/Traditional/Japanese variants.
+                var hanAlternates = HanVariantProvider.GetHanVariants(codePoints.Span[i]); // All possible variant characters (Simplified/Traditional/Japanese)
+                var pinyinAlternates = hanAlternates.SelectMany(PinyinHelper.GetPinyinCandidates).Distinct();
+                var emit = Emitter(offset + i, offset + i + 1);
+                foreach (var han in hanAlternates) emit(TokenType.Han, char.ConvertFromUtf32(han));
+                foreach (var pinyin in pinyinAlternates) emit(TokenType.Pinyin, pinyin);
+            }
+        }
+
+        var consequentCharsets = new (Func<int, bool> Is, Action<ReadOnlyMemory<int>, int> Emit)[]
+        {
+            (Is: JapaneseUtils.IsMaybeJapanese, Emit: EmitMaybeJapanese),
+        };
+
+        void EmitRaw(int codePoint, int offset) => Emitter(offset, offset + 1)(TokenType.Raw, char.ConvertFromUtf32(codePoint));
+
+        for (int start = 0; start < codePoints.Length; )
+        {
+            var codePoint = codePoints[start];
+            var emitted = false;
+            foreach (var (Is, Emit) in consequentCharsets)
+            {
+                var length = 0;
+                while (start + length < codePoints.Length && Is(codePoints[start + length])) length++;
+                if (length > 0)
+                {
+                    Emit(new Memory<int>(codePoints, start, length), start);
+                    start += length;
+                    emitted = true;
+                    break;
+                }
+            }
+            if (emitted) continue;
+
+            // Skip whitespaces
+            if (CommonUtils.IsWhitespace(codePoint))
+            {
+                start++;
+                continue;
+            }
+
+            EmitRaw(codePoint, start);
+            start++;
+        }
+        return results;
+    }
+}
@@ -0,0 +1,93 @@
+using MaigoLabs.NeedLe.Common;
+
+namespace MaigoLabs.NeedLe.Indexer.Trie;
+
+public static class TrieBuilder
+{
+    private static TrieNode NewNode(TrieNode? parent) => new() { Parent = parent, Children = [], TokenIds = [], SubTreeTokenIds = [] };
+
+    public static TrieNode BuildTrie(IEnumerable<(int Id, IEnumerable<int> CodePoints)> tokens)
+    {
+        var root = NewNode(null);
+        foreach (var (id, codePoints) in tokens)
+        {
+            var node = root;
+            foreach (var codePoint in codePoints)
+            {
+                node.Children.TryGetValue(codePoint, out var childNode);
+                if (childNode == null) node.Children[codePoint] = childNode = NewNode(node);
+                node = childNode;
+                node.SubTreeTokenIds.Add(id);
+            }
+            node.TokenIds.Add(id);
+        }
+        return root;
+    }
+
+    public static void GraftTriePaths(TrieNode root, IEnumerable<(int[] From, int[] To)> rules)
+    {
+        foreach (var (inputPhrase, graftTo) in rules) if (graftTo.Length > inputPhrase.Length) throw new ArgumentException($"Graft rule {inputPhrase} -> {graftTo} maps to longer string and may cause infinite loop");
+        var visitedNodes = new HashSet<TrieNode>();
+        void GraftFromNode(TrieNode node, bool recursiveChildren)
+        {
+            if (!visitedNodes.Add(node)) return;
+            if (recursiveChildren) foreach (var child in node.Children.Values) GraftFromNode(child, true);
+            while (true)
+            {
+                var nodesWithNewGraftedChildren = new Dictionary<TrieNode, /* depth from initial node */ int>();
+                foreach (var (inputPhrase, graftTo) in rules)
+                {
+                    var targetNode = node.Traverse(graftTo);
+                    if (targetNode == null) continue;
+                    var graftedPath = new TrieNode[inputPhrase.Length - 1];
+                    var isGrafted = false;
+                    var currentNode = node;
+                    for (var i = 0; i < inputPhrase.Length; i++)
+                    {
+                        var codePoint = inputPhrase[i];
+                        currentNode.Children.TryGetValue(codePoint, out var childNode);
+                        if (i == inputPhrase.Length - 1)
+                        {
+                            if (childNode != null)
+                            {
+                                if (childNode != targetNode) throw new ArgumentException($"Grafted path {inputPhrase} conflicts with existing path");
+                                // Already grafted
+                            }
+                            else
+                            {
+                                currentNode.Children[codePoint] = childNode = targetNode;
+                                isGrafted = true;
+                            }
+                        }
+                        else
+                        {
+                            if (childNode == null)
+                            {
+                                childNode = NewNode(currentNode);
+                                childNode.SubTreeTokenIds = targetNode.SubTreeTokenIds;
+                                currentNode.Children[codePoint] = childNode;
+                            }
+                            else
+                            {
+                                // Part of another grafted path?
+                                childNode.SubTreeTokenIds = new HashSet<int>(childNode.SubTreeTokenIds.Concat(targetNode.SubTreeTokenIds)).ToList();
+                            }
+                            graftedPath[i] = currentNode = childNode;
+                        }
+                    }
+                    if (isGrafted) for (var i = 0; i < graftedPath.Length; i++) nodesWithNewGraftedChildren[graftedPath[i]!] = i + 1;
+                }
+                if (nodesWithNewGraftedChildren.Count > 0)
+                {
+                    // Re-check graft rules on the newly grafted path
+                    // 1. No need to recursive other children (not on this path) since their children are not affected
+                    // 2. No need to consider ancestors of this node since they're handled later (we run in DFS order)
+                    var sortedNodes = nodesWithNewGraftedChildren.OrderByDescending(x => x.Value);
+                    foreach (var (changedNode, _) in sortedNodes) GraftFromNode(changedNode, false);
+                }
+                else break; // No new grafts applied
+            }
+        }
+        GraftFromNode(root, true);
+    }
+}
@@ -0,0 +1,41 @@
+using MaigoLabs.NeedLe.Common;
+
+namespace MaigoLabs.NeedLe.Indexer.Trie;
+
+public static class TrieSerializer
+{
+    private class NodeEntry
+    {
+        public int Id { get; set; }
+        public bool Visited { get; set; }
+        public int[]? Data { get; set; }
+    }
+
+    public static int[] Serialize(TrieNode root)
+    {
+        var nodeEntries = new Dictionary<TrieNode, NodeEntry>();
+        var currentId = 0;
+        NodeEntry GetNodeEntry(TrieNode node) => nodeEntries.TryGetValue(node, out var nodeEntry) ? nodeEntry :
+            nodeEntries[node] = new NodeEntry { Id = ++currentId, Visited = false, Data = null };
+        int SerializeNode(TrieNode node)
+        {
+            var entry = GetNodeEntry(node);
+            if (entry.Visited) return entry.Id;
+            entry.Visited = true;
+            var children = node.Children.Select(child => (CodePoint: child.Key, ChildId: SerializeNode(child.Value))).ToArray();
+            entry.Data =
+            [
+                node.Parent != null ? GetNodeEntry(node.Parent).Id : 0,
+                .. children.Select(child => child.CodePoint),
+                .. children.Select(child => child.ChildId),
+                // End of children list (<= 0 are not valid code points nor node IDs)
+                .. node.TokenIds.Count > 0
+                    ? node.TokenIds.Select(tokenId => -(tokenId + 1)) // Use the negative value of (tokenId + 1)
+                    : [0], // End of children list, no token IDs (token IDs are encoded to negative values)
+            ];
+            return entry.Id;
+        }
+        SerializeNode(root);
+        return nodeEntries.Values.OrderBy(entry => entry.Id).SelectMany(entry => entry.Data ?? []).ToArray();
+    }
+}
@@ -0,0 +1,18 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net10.0</TargetFramework>
+    <OutputType>Exe</OutputType>
+    <RootNamespace>$(ProjectName).Playground</RootNamespace>
+    <AssemblyName>$(RootNamespace)</AssemblyName>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Telegram.Bot" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\MaigoLabs.NeedLe\MaigoLabs.NeedLe.csproj" />
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,162 @@
+using System.Diagnostics;
+using System.Text.Encodings.Web;
+using System.Text.Json;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Indexer;
+using MaigoLabs.NeedLe.Searcher;
+using Telegram.Bot;
+using Telegram.Bot.Polling;
+using Telegram.Bot.Types;
+using Telegram.Bot.Types.Enums;
+
+namespace MaigoLabs.NeedLe.Playground;
+
+public class Program
+{
+    private static LoadedInvertedIndex _invertedIndex = null!;
+    private static long _targetChatId;
+
+    public static async Task Main(string[] args)
+    {
+        var botToken = Environment.GetEnvironmentVariable("TELEGRAM_BOT_TOKEN")
+            ?? throw new InvalidOperationException("Missing environment variable TELEGRAM_BOT_TOKEN");
+        var targetChatIdStr = Environment.GetEnvironmentVariable("TARGET_CHAT_ID")
+            ?? throw new InvalidOperationException("Missing environment variable TARGET_CHAT_ID");
+        _targetChatId = long.Parse(targetChatIdStr);
+
+        // Build inverted index
+        var exampleDocuments = File.ReadAllLines("../../example.txt").Where(line => line.Length > 0).ToArray();
+
+        var startBuild = Stopwatch.GetTimestamp();
+        var compressed = InvertedIndexBuilder.BuildInvertedIndex(exampleDocuments);
+        var endBuild = Stopwatch.GetTimestamp();
+        Console.WriteLine($"Built inverted index in {Stopwatch.GetElapsedTime(startBuild, endBuild).TotalMilliseconds}ms");
+
+        var startLoad = Stopwatch.GetTimestamp();
+        _invertedIndex = InvertedIndexLoader.Load(compressed);
+        var endLoad = Stopwatch.GetTimestamp();
+        Console.WriteLine($"Loaded inverted index in {Stopwatch.GetElapsedTime(startLoad, endLoad).TotalMilliseconds}ms");
+
+        // Start bot
+        var bot = new TelegramBotClient(botToken);
+        var me = await bot.GetMe();
+        Console.WriteLine($"Bot logged in as {me.FirstName} (@{me.Username})");
+
+        using var cts = new CancellationTokenSource();
+        Console.CancelKeyPress += (_, e) => { e.Cancel = true; cts.Cancel(); };
+
+        bot.StartReceiving(
+            updateHandler: HandleUpdateAsync,
+            errorHandler: HandleErrorAsync,
+            receiverOptions: new ReceiverOptions { AllowedUpdates = [UpdateType.Message] },
+            cancellationToken: cts.Token
+        );
+        await Task.Delay(-1, cts.Token).ContinueWith(_ => { });
+    }
+
+    private static async Task HandleUpdateAsync(ITelegramBotClient bot, Update update, CancellationToken ct)
+    {
+        if (update.Message is not { Text: { } text, Chat.Id: var chatId, From: { } from }) return;
+
+        Console.WriteLine($"{chatId}:{from.Id} {JsonSerializer.Serialize(text, JsonSerializerOptions)}");
+
+        if (chatId != _targetChatId) return;
+
+        if (text.StartsWith("/needle "))
+        {
+            var query = text["/needle ".Length..];
+            var response = HandleNeedleCommand(query);
+            await bot.SendMessage(chatId, response, parseMode: ParseMode.Html, cancellationToken: ct);
+        }
+        else if (text.StartsWith("/tokenize "))
+        {
+            var query = text["/tokenize ".Length..];
+            var response = HandleTokenizeCommand(query);
+            await bot.SendMessage(chatId, response, parseMode: ParseMode.Html, cancellationToken: ct);
+        }
+    }
+
+    private static Task HandleErrorAsync(ITelegramBotClient bot, Exception exception, HandleErrorSource source, CancellationToken ct)
+    {
+        Console.WriteLine($"Error: {exception.Message}");
+        return Task.CompletedTask;
+    }
+
+    private static string HandleNeedleCommand(string query)
+    {
+        var startSearch = Stopwatch.GetTimestamp();
+        var results = InvertedIndexSearcher.Search(_invertedIndex, query);
+        var endSearch = Stopwatch.GetTimestamp();
+        var searchDuration = Stopwatch.GetElapsedTime(startSearch, endSearch).TotalMilliseconds.ToString("F3");
+
+        if (results.Length == 0)
+            return Codify($"No results found after {searchDuration}ms");
+
+        var showingResults = results.Take(5).ToArray();
+        return string.Join('\n',
+        [
+            Codify($"Search completed in {searchDuration}ms, showing {showingResults.Length}/{results.Length} results:\n"),
+            .. showingResults.Select(result => InspectSearchResult(result, true))
+        ]).TrimEnd();
+    }
+
+    private static string HandleTokenizeCommand(string query)
+    {
+        var tokenizer = new Tokenizer();
+        var startTokenize = Stopwatch.GetTimestamp();
+        var tokens = tokenizer.Tokenize(query);
+        var tokenDefinitions = tokenizer.Tokens.Values.ToArray();
+        var endTokenize = Stopwatch.GetTimestamp();
+        var tokenizeDuration = Stopwatch.GetElapsedTime(startTokenize, endTokenize).TotalMilliseconds.ToString("F3");
+        if (tokens.Count == 0) return Codify($"No tokens emitted after {tokenizeDuration}ms");
+
+        var codePoints = query.ToCodePoints().ToArray();
+        var lines = new List<string>
+        {
+            $"Tokenization completed in {tokenizeDuration}ms, emitted {tokens.Count} tokens:"
+        };
+        foreach (var token in tokens)
+        {
+            var tokenDef = tokenDefinitions[token.Id];
+            var originalPhrase = codePoints.Skip(token.Start).Take(token.End - token.Start).ToUtf32String();
+            lines.Add($"  {tokenDef.Type}: {JsonSerializer.Serialize(tokenDef.Text, JsonSerializerOptions)} <- {JsonSerializer.Serialize(originalPhrase, JsonSerializerOptions)} [{token.Start}, {token.End}]");
+        }
+        return Codify(string.Join('\n', lines));
+    }
+
+    private static string InspectSearchResult(SearchResult result, bool htmlHighlight)
+    {
+        var documentText = result.DocumentText;
+        var documentCodePoints = result.DocumentCodePoints;
+        var tokens = result.Tokens;
+        var rangeCount = result.RangeCount;
+        var matchRatio = result.MatchRatio;
+        var matchRatioLevel = result.MatchRatioLevel;
+
+        var resultText = htmlHighlight
+            ? string.Join("", SearchResultHighlighter.Highlight(result).Select(part => !part.IsHighlighted ? EscapeHtml(part.Text) : $"<u><b>{EscapeHtml(part.Text)}</b></u>"))
+            : documentText;
+        var description = $" ({rangeCount} ranges, {Math.Round(matchRatio * 10000) / 10000} => L{matchRatioLevel})";
+        return string.Join('\n',
+        [
+            resultText + (htmlHighlight ? $"<code>{description}</code>" : description),
+            .. tokens.Select(token =>
+            {
+                var escapedTokenText = JsonSerializer.Serialize(token.Definition.Text, JsonSerializerOptions);
+                var escapedDocumentText = JsonSerializer.Serialize(documentCodePoints.Skip(token.DocumentOffset.Start).Take(token.DocumentOffset.Length).ToUtf32String(), JsonSerializerOptions);
+                if (htmlHighlight)
+                {
+                    escapedTokenText = EscapeHtml(escapedTokenText);
+                    escapedDocumentText = EscapeHtml(escapedDocumentText);
+                }
+                var line = $"    {token.Definition.Type}: {escapedTokenText} -> {escapedDocumentText}" + (token.IsTokenPrefixMatching ? " (prefix match)" : "");
+                return htmlHighlight ? $"<code>{line}</code>" : line;
+            }),
+            "",
+        ]);
+    }
+
+    private static string Codify(string text) => $"<code>{EscapeHtml(text)}</code>";
+    private static JsonSerializerOptions JsonSerializerOptions => new() { Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping };
+    private static string EscapeHtml(string text) => text.Replace("&", "&amp;").Replace("<", "&lt;").Replace(">", "&gt;");
+}
@@ -0,0 +1,72 @@
+using MaigoLabs.NeedLe.Common;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Common.Types;
+using MaigoLabs.NeedLe.Searcher.Trie;
+
+namespace MaigoLabs.NeedLe.Searcher;
+
+public class LoadedInvertedIndex
+{
+    public class TokenDocumentReference
+    {
+        public required int DocumentId { get; set; }
+        public required OffsetSpan[] Offsets { get; set; }
+    }
+
+    public class TokenDefinitionExtended : TokenDefinition
+    {
+        public required TokenDocumentReference[] References { get; set; }
+    }
+
+    public class TypedTries
+    {
+        public required TrieNode Romaji { get; set; }
+        public required TrieNode Kana { get; set; }
+        public required TrieNode Other { get; set; }
+    }
+
+    public required string[] Documents { get; set; }
+    public required int[][] DocumentCodePoints { get; set; }
+    public required TokenDefinitionExtended[] TokenDefinitions { get; set; }
+    public required TypedTries Tries { get; set; }
+}
+
+public class InvertedIndexLoader
+{
+    public static LoadedInvertedIndex Load(CompressedInvertedIndex compressed)
+    {
+        var documents = compressed.documents;
+        var documentCodePoints = documents.Select(document => document.ToCodePoints().ToArray()).ToArray();
+
+        var romajiTrie = TrieDeserializer.Deserialize(compressed.tries.romaji);
+        var kanaTrie = TrieDeserializer.Deserialize(compressed.tries.kana);
+        var otherTrie = TrieDeserializer.Deserialize(compressed.tries.other);
+
+        var tokenCodePoints = romajiTrie.TokenCodePoints.Concat(kanaTrie.TokenCodePoints).Concat(otherTrie.TokenCodePoints)
+            .ToDictionary(entry => entry.Key, entry => entry.Value);
+        var tokenDefinitions = compressed.tokenTypes.Select((type, index) => new LoadedInvertedIndex.TokenDefinitionExtended
+        {
+            Id = index, Type = (TokenType)type, Text = tokenCodePoints[index].ToUtf32String(),
+            CodePointLength = tokenCodePoints[index].Length,
+            References = compressed.tokenReferences[index].Select(data => new LoadedInvertedIndex.TokenDocumentReference
+            {
+                DocumentId = data[0],
+                Offsets = Enumerable.Range(0, data.Length / 2)
+                    .Select(i => new OffsetSpan { Start = data[i * 2 + 1], End = data[i * 2 + 2] }).ToArray(),
+            }).ToArray(),
+        }).ToArray();
+
+        return new LoadedInvertedIndex
+        {
+            Documents = documents,
+            DocumentCodePoints = documentCodePoints,
+            TokenDefinitions = tokenDefinitions,
+            Tries = new LoadedInvertedIndex.TypedTries
+            {
+                Romaji = romajiTrie.Root,
+                Kana = kanaTrie.Root,
+                Other = otherTrie.Root,
+            },
+        };
+    }
+}
@@ -0,0 +1,270 @@
+using MaigoLabs.NeedLe.Common;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Common.Types;
+
+namespace MaigoLabs.NeedLe.Searcher;
+
+public class SearchResultToken
+{
+    public required TokenDefinition Definition { get; set; }
+    public required OffsetSpan DocumentOffset { get; set; }
+    public required OffsetSpan InputOffset { get; set; }
+    public required bool IsTokenPrefixMatching { get; set; }
+}
+
+public class SearchResult
+{
+    public required int DocumentId { get; set; }
+    public required string DocumentText { get; set; }
+    public required int[] DocumentCodePoints { get; set; }
+    public required SearchResultToken[] Tokens { get; set; }
+    public required int PrefixMatchCount { get; set; }
+    public required int RangeCount { get; set; }
+    public required double MatchRatio { get; set; }
+    public required int MatchRatioLevel { get; set; }
+}
+
+public static class InvertedIndexSearcher
+{
+    public abstract class ComparableStateBase<T> : IComparable<T>
+        where T : ComparableStateBase<T>
+    {
+        protected abstract int GetRangeCount();
+        protected abstract int GetPrefixMatchCount();
+        protected abstract OffsetSpan GetFirstTokenDocumentOffset();
+        protected abstract OffsetSpan GetLastTokenDocumentOffset();
+        protected virtual SearchResultToken? GetLastToken() => null; // Not on intermediate results
+        protected virtual int? GetMatchRatioLevel() => null; // Not on intermediate/candidate results
+        protected abstract double GetMatchRatio();
+        protected virtual int FallbackCompareTo(T other) => 0; // Called when all other comparisons are equal
+
+        public int CompareTo(T other)
+        {
+            // Prefer matches that not relying on end-of-input loose matching (full match over prefix match)
+            SearchResultToken? aLastToken = GetLastToken(), bLastToken = other.GetLastToken();
+            if (aLastToken != null && bLastToken != null)
+            {
+                var aDidPrefixMatchByTokenType = aLastToken.IsTokenPrefixMatching && tokenTypePrefixMatchingPolicy[aLastToken.Definition.Type] == TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd;
+                var bDidPrefixMatchByTokenType = bLastToken.IsTokenPrefixMatching && tokenTypePrefixMatchingPolicy[bLastToken.Definition.Type] == TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd;
+                if (aDidPrefixMatchByTokenType != bDidPrefixMatchByTokenType) return aDidPrefixMatchByTokenType ? 1 : -1;
+            }
+
+            // Prefer results that matched fewer discontinuous ranges over more
+            int aRangeCount = GetRangeCount(), bRangeCount = other.GetRangeCount();
+            if (aRangeCount != bRangeCount) return aRangeCount - bRangeCount;
+
+            // Prefer results that matches first token in document earlier over later
+            OffsetSpan aFirstTokenDocumentOffset = GetFirstTokenDocumentOffset(), bFirstTokenDocumentOffset = other.GetFirstTokenDocumentOffset();
+            if (aFirstTokenDocumentOffset.Start != bFirstTokenDocumentOffset.Start) return aFirstTokenDocumentOffset.Start - bFirstTokenDocumentOffset.Start;
+
+            // Prefer results that has higher match ratio (but don't distinguish similar ratios, so we introduced `matchRatioLevel`)
+            int? aMatchRatioLevel = GetMatchRatioLevel(), bMatchRatioLevel = other.GetMatchRatioLevel();
+            if (aMatchRatioLevel != null && bMatchRatioLevel != null)
+            {
+                if (aMatchRatioLevel.Value != bMatchRatioLevel.Value) return bMatchRatioLevel.Value - aMatchRatioLevel.Value;
+            }
+
+            // Prefer results that last token occurred earlier (if same, ended earlier) in the document over later
+            OffsetSpan aLastTokenDocumentOffset = GetLastTokenDocumentOffset(), bLastTokenDocumentOffset = other.GetLastTokenDocumentOffset();
+            if (aLastTokenDocumentOffset.Start != bLastTokenDocumentOffset.Start) return aLastTokenDocumentOffset.Start - bLastTokenDocumentOffset.Start;
+            if (aLastTokenDocumentOffset.End != bLastTokenDocumentOffset.End) return aLastTokenDocumentOffset.End - bLastTokenDocumentOffset.End;
+
+            // Prefer results that has higher match ratio (precisely)
+            double aMatchRatio = GetMatchRatio(), bMatchRatio = other.GetMatchRatio();
+            if (aMatchRatio != bMatchRatio) return bMatchRatio < aMatchRatio ? -1 : bMatchRatio > aMatchRatio ? 1 : 0;
+
+            return FallbackCompareTo(other);
+        }
+    }
+
+    public class IntermediateResult : ComparableStateBase<IntermediateResult>
+    {
+        public required IntermediateResult? PreviousState { get; init; }
+        public required OffsetSpan FirstTokenDocumentOffset { get; init; }
+        public required int RangeCount { get; init; }
+        public required int TokenCount { get; init; }
+        public required int PrefixMatchCount { get; init; }
+        public required double MatchedTokenLength { get; init; }
+        public required int TokenId { get; init; }
+        public required OffsetSpan DocumentOffset { get; init; }
+        public required OffsetSpan InputOffset { get; init; }
+        public required bool IsTokenPrefixMatching { get; init; }
+
+        protected override int GetRangeCount() => RangeCount;
+        protected override int GetPrefixMatchCount() => PrefixMatchCount;
+        protected override OffsetSpan GetFirstTokenDocumentOffset() => FirstTokenDocumentOffset;
+        protected override OffsetSpan GetLastTokenDocumentOffset() => DocumentOffset;
+        protected override double GetMatchRatio() => MatchedTokenLength; // No need to divide document length since intermediate results are for same document
+    }
+
+    public class CandidateResult : ComparableStateBase<CandidateResult>
+    {
+        public required SearchResultToken[] Tokens { get; init; }
+        public required int PrefixMatchCount { get; init; }
+        public required double MatchedTokenLength { get; init; }
+        public required int RangeCount { get; init; }
+
+        protected override int GetRangeCount() => RangeCount;
+        protected override int GetPrefixMatchCount() => PrefixMatchCount;
+        protected override OffsetSpan GetFirstTokenDocumentOffset() => Tokens[0].DocumentOffset;
+        protected override OffsetSpan GetLastTokenDocumentOffset() => Tokens[^1].DocumentOffset;
+        protected override SearchResultToken? GetLastToken() => Tokens[^1];
+        protected override double GetMatchRatio() => MatchedTokenLength; // No need to divide document length since intermediate results are for same document
+    }
+
+    public class FinalResult : ComparableStateBase<FinalResult>
+    {
+        public required SearchResult Result { get; init; }
+
+        protected override int GetRangeCount() => Result.RangeCount;
+        protected override int GetPrefixMatchCount() => Result.PrefixMatchCount;
+        protected override OffsetSpan GetFirstTokenDocumentOffset() => Result.Tokens[0].DocumentOffset;
+        protected override OffsetSpan GetLastTokenDocumentOffset() => Result.Tokens[^1].DocumentOffset;
+        protected override SearchResultToken? GetLastToken() => Result.Tokens[^1];
+        protected override double GetMatchRatio() => Result.MatchRatio;
+        protected override int? GetMatchRatioLevel() => Result.MatchRatioLevel;
+        protected override int FallbackCompareTo(FinalResult other) => string.Compare(Result.DocumentText, other.Result.DocumentText, StringComparison.InvariantCulture);
+    }
+
+    private static bool IsIgnorableCodePoint(int codePoint) => CommonUtils.IsWhitespace(codePoint) || codePoint == 0x3099 || codePoint == 0x309A;
+
+    public enum TokenTypePrefixMatchingPolicy {
+        AlwaysAllow,
+        NeverAllow,
+        AllowOnlyAtInputEnd,
+    }
+
+    private static Dictionary<TokenType, TokenTypePrefixMatchingPolicy> tokenTypePrefixMatchingPolicy = new()
+    {
+        [TokenType.Romaji] = TokenTypePrefixMatchingPolicy.NeverAllow,
+        [TokenType.Kana] = TokenTypePrefixMatchingPolicy.AlwaysAllow,
+        // These token types are in an "other" Trie
+        [TokenType.Han] = TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd, // No effect because always 1 code point
+        [TokenType.Pinyin] = TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd,
+        [TokenType.Raw] = TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd, // No effect because always 1 code point
+    };
+
+    private static bool ShouldAllowPrefixMatching(TokenType tokenType, bool isAtInputEnd) =>
+        tokenTypePrefixMatchingPolicy[tokenType] == TokenTypePrefixMatchingPolicy.AlwaysAllow ||
+        (tokenTypePrefixMatchingPolicy[tokenType] != TokenTypePrefixMatchingPolicy.NeverAllow && isAtInputEnd);
+
+    private static bool HasNonEmptyCharacters(int[] documentCodePoints, int start, int end) =>
+        start != end && !documentCodePoints.Skip(start).Take(end - start).All(CommonUtils.IsWhitespace);
+
+    public static SearchResult[] Search(LoadedInvertedIndex invertedIndex, string text)
+    {
+        var documents = invertedIndex.Documents;
+        var documentCodePoints = invertedIndex.DocumentCodePoints;
+        var tokenDefinitions = invertedIndex.TokenDefinitions;
+        var tries = invertedIndex.Tries;
+
+        var codePoints = text.ToCodePoints().Select(CommonNormalization.NormalizeCodePoint).Select(CommonNormalization.ToKatakana).ToArray();
+        // dp[i] = docId => end => IntermediateResult, starts from dp[-1] (l === 0), ends at dp[N - 1] (r === N - 1)
+        var dp = Enumerable.Range(0, codePoints.Length).Select(l => new Dictionary<int, Dictionary<int, IntermediateResult>>()).ToArray();
+        for (var l = 0; l < codePoints.Length; l++)
+        {
+            if (l != 0 && dp[l - 1].Count == 0) continue; // No documents match input from beginning to this position
+            var romajiNode = tries.Romaji;
+            var kanaNode = tries.Kana;
+            var otherNode = tries.Other;
+            for (var r = l; r < codePoints.Length && (romajiNode != null || kanaNode != null || otherNode != null); r++) // [l, r]
+            {
+                var codePoint = codePoints[r];
+                romajiNode = romajiNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint));
+                kanaNode = kanaNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint));
+                otherNode = otherNode.TraverseStep(codePoint, IsIgnorableCodePoint(codePoint));
+                var reachingInputEnd = r == codePoints.Length - 1;
+                HashSet<int> matchingTokenIds =
+                [
+                    // Allow suffix matching of romaji/other tokens if we're at the end of the input
+                    .. romajiNode.GetTokenIds(ShouldAllowPrefixMatching(TokenType.Romaji, reachingInputEnd)),
+                    .. kanaNode.GetTokenIds(ShouldAllowPrefixMatching(TokenType.Kana, reachingInputEnd)),
+                    .. otherNode.GetTokenIds(reachingInputEnd),
+                ];
+                foreach (var tokenId in matchingTokenIds) foreach (var reference in tokenDefinitions[tokenId].References)
+                {
+                    var isTokenPrefixMatching = !romajiNode.IsTokenExactMatch(tokenId) && !kanaNode.IsTokenExactMatch(tokenId) && !otherNode.IsTokenExactMatch(tokenId);
+                    var previousMatchesOfDocument = l != 0 && dp[l - 1].TryGetValue(reference.DocumentId, out var previousMatches) ? previousMatches : null;
+                    if (l != 0 && previousMatchesOfDocument == null) continue;
+                    foreach (var documentOffset in reference.Offsets)
+                    {
+                        int currentStart = documentOffset.Start, currentEnd = documentOffset.End;
+                        if (l == 0) ContributeNextMatchingState(null);
+                        else foreach (var (previousEnd, previousMatch) in previousMatchesOfDocument!) if (currentStart >= previousEnd) ContributeNextMatchingState(previousMatch);
+                        void ContributeNextMatchingState(IntermediateResult? previousState)
+                        {
+                            var nextMatchingMap = dp[r];
+                            if (!nextMatchingMap.TryGetValue(reference.DocumentId, out var nextMatches)) nextMatches = nextMatchingMap[reference.DocumentId] = [];
+                            var oldResult = nextMatches.TryGetValue(currentEnd, out var result) ? result : null;
+                            var inputOffset = new OffsetSpan { Start = l, End = r + 1 };
+                            var newResult = new IntermediateResult
+                            {
+                                PreviousState = previousState,
+                                FirstTokenDocumentOffset = previousState?.FirstTokenDocumentOffset ?? documentOffset,
+                                RangeCount = previousState == null ? 1 :
+                                    previousState.RangeCount + (HasNonEmptyCharacters(documentCodePoints[reference.DocumentId], previousState.DocumentOffset.End, currentStart) ? 1 : 0),
+                                TokenCount = (previousState?.TokenCount ?? 0) + 1,
+                                PrefixMatchCount = (previousState?.PrefixMatchCount ?? 0) + (isTokenPrefixMatching ? 1 : 0),
+                                MatchedTokenLength = (previousState?.MatchedTokenLength ?? 0) + documentOffset.Length *
+                                    Math.Min(isTokenPrefixMatching ? (double)inputOffset.Length / tokenDefinitions[tokenId].CodePointLength : double.PositiveInfinity, 1),
+                                TokenId = tokenId,
+                                DocumentOffset = documentOffset,
+                                InputOffset = inputOffset,
+                                IsTokenPrefixMatching = isTokenPrefixMatching,
+                            };
+                            nextMatches[currentEnd] = oldResult == null || newResult.CompareTo(oldResult) < 0 ? newResult : oldResult;
+                        }
+                    }
+                }
+            }
+        }
+
+        // Build search results and sort documents
+        return dp[codePoints.Length - 1].Select(entry =>
+        {
+            var (documentId, matches) = entry;
+            var sortedMatches = matches.Values.Select(match =>
+            {
+                var tokens = new List<SearchResultToken>();
+                // Build token list from backtracking
+                var state = match;
+                while (state != null)
+                {
+                    tokens.Add(new SearchResultToken
+                    {
+                        Definition = tokenDefinitions[state.TokenId],
+                        DocumentOffset = state.DocumentOffset, InputOffset = state.InputOffset,
+                        IsTokenPrefixMatching = state.IsTokenPrefixMatching,
+                    });
+                    state = state.PreviousState;
+                }
+                tokens.Reverse();
+                return new CandidateResult
+                {
+                    Tokens = tokens.ToArray(),
+                    PrefixMatchCount = match.PrefixMatchCount,
+                    MatchedTokenLength = match.MatchedTokenLength,
+                    RangeCount = match.RangeCount,
+                };
+            }).OrderBy(match => match);
+            var bestMatch = sortedMatches.First();
+            var documentText = documents[documentId];
+            var matchRatio = bestMatch.MatchedTokenLength / documentCodePoints[documentId].Length;
+            var matchRatioLevel = (int)Math.Round(matchRatio * 5);
+            return new FinalResult
+            {
+                Result = new SearchResult
+                {
+                    DocumentId = documentId,
+                    DocumentText = documentText,
+                    DocumentCodePoints = documentCodePoints[documentId],
+                    Tokens = bestMatch.Tokens,
+                    PrefixMatchCount = bestMatch.PrefixMatchCount,
+                    RangeCount = bestMatch.RangeCount,
+                    MatchRatio = matchRatio,
+                    MatchRatioLevel = matchRatioLevel,
+                }
+            };
+        }).OrderBy(result => result).Select(result => result.Result).ToArray();
+    }
+}
@@ -0,0 +1,23 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.0</TargetFramework>
+    <OutputType>Library</OutputType>
+    <RootNamespace>$(ProjectName).Searcher</RootNamespace>
+    <AssemblyName>$(RootNamespace)</AssemblyName>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <IsPackable>true</IsPackable>
+    <PackageId>$(RootNamespace)</PackageId>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Common\MaigoLabs.NeedLe.Common.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="DotNetCampus.LatestCSharpFeatures" PrivateAssets="all" />
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,37 @@
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Common.Types;
+
+namespace MaigoLabs.NeedLe.Searcher;
+
+public class HighlightedTextPart
+{
+    public required string Text { get; init; }
+    public required bool IsHighlighted { get; init; }
+}
+
+public static class SearchResultHighlighter
+{
+    public static List<HighlightedTextPart> Highlight(SearchResult resultDocument)
+    {
+        var result = new List<HighlightedTextPart>();
+        var previousHighlightEnd = 0;
+        foreach (var token in resultDocument.Tokens)
+        {
+            var notHighlightedText = resultDocument.DocumentCodePoints.Skip(previousHighlightEnd).Take(token.DocumentOffset.Start - previousHighlightEnd).ToUtf32String();
+            if (notHighlightedText.Length > 0) result.Add(new HighlightedTextPart { Text = notHighlightedText, IsHighlighted = false });
+            var highlightEnd = token.IsTokenPrefixMatching && token.Definition.Type == TokenType.Kana
+                ? token.DocumentOffset.Start + Math.Max(
+                    1,
+                    (int)Math.Round(
+                        token.DocumentOffset.Length *
+                        Math.Min(1, (double)token.InputOffset.Length / token.Definition.CodePointLength)
+                    )
+                )
+                : token.DocumentOffset.End;
+            result.Add(new HighlightedTextPart { Text = resultDocument.DocumentCodePoints.Skip(token.DocumentOffset.Start).Take(highlightEnd - token.DocumentOffset.Start).ToUtf32String(), IsHighlighted = true });
+            previousHighlightEnd = highlightEnd;
+        }
+        if (previousHighlightEnd < resultDocument.DocumentCodePoints.Length) result.Add(new HighlightedTextPart { Text = resultDocument.DocumentCodePoints.Skip(previousHighlightEnd).ToUtf32String(), IsHighlighted = false });
+        return result;
+    }
+}
@@ -0,0 +1,73 @@
+using MaigoLabs.NeedLe.Common;
+
+namespace MaigoLabs.NeedLe.Searcher.Trie;
+
+public class DeserializedTrie
+{
+    public required TrieNode Root { get; set; }
+    public required Dictionary<int, int[]> TokenCodePoints { get; set; }
+}
+
+public static class TrieDeserializer
+{
+    public static DeserializedTrie Deserialize(int[] data)
+    {
+        var nodes = new List<TrieNode?>();
+        TrieNode GetNode(int id)
+        {
+            if (id > nodes.Count) nodes.AddRange(Enumerable.Repeat<TrieNode?>(null, id - nodes.Count));
+            return nodes[id - 1] ??= new TrieNode { Parent = null, Children = [], TokenIds = [], SubTreeTokenIds = [] };
+        }
+        var currentId = 0;
+        for (var i = 0; i < data.Length; )
+        {
+            var node = GetNode(++currentId);
+            var parentId = data[i++];
+            node.Parent = parentId != 0 ? GetNode(parentId) : null;
+
+            var endOfChildren = i;
+            while (endOfChildren < data.Length && data[endOfChildren] > 0) endOfChildren++;
+            var numberOfChildren = (endOfChildren - i) / 2;
+            for (var j = i; j < i + numberOfChildren; j++)
+            {
+                var codePoint = data[j];
+                var child = GetNode(data[j + numberOfChildren]);
+                node.Children.Add(codePoint, child);
+            }
+            i = endOfChildren;
+
+            if (data[i] == 0) i++; // No token IDs
+            else while (i < data.Length && data[i] < 0) node.TokenIds.Add(-data[i++] - 1);
+        }
+        var root = nodes[0]!;
+
+        // DFS to construct code point paths for each token
+        var tokenCodePoints = new Dictionary<int, int[]>();
+        var currentCodePoints = new List<int>();
+        void DfsCodePoints(TrieNode node)
+        {
+            foreach (var tokenId in node.TokenIds) tokenCodePoints.Add(tokenId, [.. currentCodePoints]);
+            foreach (var (codePoint, child) in node.Children)
+            {
+                if (child.Parent != node) continue; // Skip grafted paths as these are not the canonical representation of the tokens
+                currentCodePoints.Add(codePoint);
+                DfsCodePoints(child);
+                currentCodePoints.RemoveAt(currentCodePoints.Count - 1);
+            }
+        }
+        DfsCodePoints(root);
+
+        // DFS to construct subTreeTokenIds for each node
+        var visitedNodes = new HashSet<TrieNode>();
+        List<int> DfsSubTreeTokenIds(TrieNode node)
+        {
+            if (visitedNodes.Contains(node)) return node.SubTreeTokenIds;
+            visitedNodes.Add(node);
+            node.SubTreeTokenIds = new HashSet<int>(node.TokenIds.Concat(node.Children.Values.SelectMany(DfsSubTreeTokenIds))).ToList();
+            return node.SubTreeTokenIds;
+        };
+        DfsSubTreeTokenIds(root);
+
+        return new DeserializedTrie { Root = root, TokenCodePoints = tokenCodePoints };
+    }
+}
@@ -0,0 +1,126 @@
+using MaigoLabs.NeedLe.Common;
+
+namespace MaigoLabs.NeedLe.Tests.Common;
+
+#region ToKatakana
+
+public sealed class ToKatakana_ConvertsHiraganaToKatakanaTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("アイウエオ", CommonNormalization.ToKatakana("あいうえお"));
+        Assert.Equal("カキクケコ", CommonNormalization.ToKatakana("かきくけこ"));
+        Assert.Equal("サシスセソ", CommonNormalization.ToKatakana("さしすせそ"));
+    }
+}
+
+public sealed class ToKatakana_KeepsKatakanaUnchangedTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("アイウエオ", CommonNormalization.ToKatakana("アイウエオ"));
+    }
+}
+
+public sealed class ToKatakana_KeepsNonKanaUnchangedTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("abc123", CommonNormalization.ToKatakana("abc123"));
+        Assert.Equal("漢字", CommonNormalization.ToKatakana("漢字"));
+    }
+}
+
+public sealed class ToKatakana_HandlesMixedInputTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("アアa漢", CommonNormalization.ToKatakana("あアa漢"));
+    }
+}
+
+#endregion
+
+#region NormalizeCodePoint
+
+public sealed class NormalizeCodePoint_ConvertsFullwidthAsciiToHalfwidthLowercaseTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal('a', CommonNormalization.NormalizeCodePoint('Ａ'));
+        Assert.Equal('b', CommonNormalization.NormalizeCodePoint('Ｂ'));
+        Assert.Equal('c', CommonNormalization.NormalizeCodePoint('Ｃ'));
+        Assert.Equal('1', CommonNormalization.NormalizeCodePoint('１'));
+        Assert.Equal('2', CommonNormalization.NormalizeCodePoint('２'));
+        Assert.Equal('3', CommonNormalization.NormalizeCodePoint('３'));
+        Assert.Equal('!', CommonNormalization.NormalizeCodePoint('！'));
+    }
+}
+
+public sealed class NormalizeCodePoint_ConvertsFullwidthSpaceToHalfwidthTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal(' ', CommonNormalization.NormalizeCodePoint('　'));
+    }
+}
+
+public sealed class NormalizeCodePoint_ConvertsHalfwidthKanaToFullwidthTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal('ア', CommonNormalization.NormalizeCodePoint('ｱ'));
+        Assert.Equal('イ', CommonNormalization.NormalizeCodePoint('ｲ'));
+        Assert.Equal('ウ', CommonNormalization.NormalizeCodePoint('ｳ'));
+        Assert.Equal('エ', CommonNormalization.NormalizeCodePoint('ｴ'));
+        Assert.Equal('オ', CommonNormalization.NormalizeCodePoint('ｵ'));
+        Assert.Equal('カ', CommonNormalization.NormalizeCodePoint('ｶ'));
+    }
+}
+
+public sealed class NormalizeCodePoint_NormalizesVoicedSoundMarksTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal(0x3099, CommonNormalization.NormalizeCodePoint('ﾞ')); // halfwidth voiced -> combining
+        Assert.Equal(0x309A, CommonNormalization.NormalizeCodePoint('ﾟ')); // halfwidth semi-voiced -> combining
+        Assert.Equal(0x3099, CommonNormalization.NormalizeCodePoint('゛')); // fullwidth voiced -> combining
+        Assert.Equal(0x309A, CommonNormalization.NormalizeCodePoint('゜')); // fullwidth semi-voiced -> combining
+    }
+}
+
+public sealed class NormalizeCodePoint_ConvertsHalfwidthPunctuationToFullwidthTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal('。', CommonNormalization.NormalizeCodePoint('｡'));
+        Assert.Equal('「', CommonNormalization.NormalizeCodePoint('｢'));
+        Assert.Equal('」', CommonNormalization.NormalizeCodePoint('｣'));
+        Assert.Equal('、', CommonNormalization.NormalizeCodePoint('､'));
+        Assert.Equal('・', CommonNormalization.NormalizeCodePoint('･'));
+    }
+}
+
+public sealed class NormalizeCodePoint_LowercasesRegularAsciiTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal('a', CommonNormalization.NormalizeCodePoint('A'));
+        Assert.Equal('b', CommonNormalization.NormalizeCodePoint('B'));
+        Assert.Equal('c', CommonNormalization.NormalizeCodePoint('C'));
+    }
+}
+
+#endregion
+
+
@@ -0,0 +1,91 @@
+using MaigoLabs.NeedLe.Indexer;
+using MaigoLabs.NeedLe.Searcher;
+
+namespace MaigoLabs.NeedLe.Tests.E2E;
+
+public sealed class Search_MatchesWithMixedSearchQueryTest : NeedleTestBase
+{
+    private static readonly string[] TestDocuments =
+    [
+        "ミーティア",
+        "エンドマークに希望と涙を添えて",
+        "宵の鳥",
+        "僕の和風本当上手",
+    ];
+
+    [Fact]
+    public void Execute()
+    {
+        var compressed = InvertedIndexBuilder.BuildInvertedIndex(TestDocuments, TokenizerOptions);
+        var invertedIndex = InvertedIndexLoader.Load(compressed);
+
+        var results = InvertedIndexSearcher.Search(invertedIndex, "bokunoh风じょう");
+
+        // Should have at least one result
+        Assert.NotEmpty(results);
+
+        // The first result should be "僕の和風本当上手"
+        Assert.Equal("僕の和風本当上手", results[0].DocumentText);
+    }
+}
+
+public sealed class Search_HighlightsSearchResultCorrectlyTest : NeedleTestBase
+{
+    private static readonly string[] TestDocuments =
+    [
+        "ミーティア",
+        "エンドマークに希望と涙を添えて",
+        "宵の鳥",
+        "僕の和風本当上手",
+    ];
+
+    [Fact]
+    public void Execute()
+    {
+        var compressed = InvertedIndexBuilder.BuildInvertedIndex(TestDocuments, TokenizerOptions);
+        var invertedIndex = InvertedIndexLoader.Load(compressed);
+
+        var results = InvertedIndexSearcher.Search(invertedIndex, "bokunoh风じょう");
+        Assert.NotEmpty(results);
+
+        var highlighted = SearchResultHighlighter.Highlight(results[0]);
+
+        // Should be a list of parts
+        Assert.NotEmpty(highlighted);
+
+        // Collect highlighted text
+        var highlightedTexts = highlighted.Where(p => p.IsHighlighted).Select(p => p.Text).ToList();
+        var highlightedJoined = string.Join("", highlightedTexts);
+
+        Assert.Contains("僕", highlightedJoined);
+        Assert.Contains("の", highlightedJoined);
+        Assert.Contains("和", highlightedJoined);
+        Assert.Contains("風", highlightedJoined);
+        Assert.Contains("上", highlightedJoined);
+    }
+}
+
+public sealed class Search_MatchesRomajiInputToKanaDocumentsTest : NeedleTestBase
+{
+    private static readonly string[] TestDocuments =
+    [
+        "ミーティア",
+        "エンドマークに希望と涙を添えて",
+        "宵の鳥",
+        "僕の和風本当上手",
+    ];
+
+    [Fact]
+    public void Execute()
+    {
+        var compressed = InvertedIndexBuilder.BuildInvertedIndex(TestDocuments, TokenizerOptions);
+        var invertedIndex = InvertedIndexLoader.Load(compressed);
+
+        // Search for "yoi" should match "宵の鳥"
+        var results = InvertedIndexSearcher.Search(invertedIndex, "yoi");
+        var matchedTexts = results.Select(r => r.DocumentText).ToList();
+
+        Assert.Contains("宵の鳥", matchedTexts);
+    }
+}
+
@@ -0,0 +1,143 @@
+using MaigoLabs.NeedLe.Common;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Indexer.Trie;
+using MaigoLabs.NeedLe.Searcher.Trie;
+
+namespace MaigoLabs.NeedLe.Tests.E2E;
+
+#region Trie Building
+
+public sealed class TrieBuilding_BuildsTrieWithMultipleDifferentTokensTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var trie = TrieBuilder.BuildTrie([
+            (0, "hello".ToCodePoints()),
+            (1, "help".ToCodePoints()),
+            (2, "world".ToCodePoints()),
+            (3, "word".ToCodePoints()),
+        ]);
+
+        // Traverse to verify structure
+        var helloNode = trie.Traverse("hello".ToCodePoints().ToArray());
+        var helpNode = trie.Traverse("help".ToCodePoints().ToArray());
+        var worldNode = trie.Traverse("world".ToCodePoints().ToArray());
+        var wordNode = trie.Traverse("word".ToCodePoints().ToArray());
+
+        Assert.NotNull(helloNode);
+        Assert.NotNull(helpNode);
+        Assert.NotNull(worldNode);
+        Assert.NotNull(wordNode);
+
+        // Check token IDs
+        Assert.Contains(0, helloNode!.TokenIds);
+        Assert.Contains(1, helpNode!.TokenIds);
+        Assert.Contains(2, worldNode!.TokenIds);
+        Assert.Contains(3, wordNode!.TokenIds);
+
+        // Check that 'hel' prefix node has both tokens in subTree
+        var helNode = trie.Traverse("hel".ToCodePoints().ToArray());
+        Assert.NotNull(helNode);
+        Assert.Contains(0, helNode!.SubTreeTokenIds);
+        Assert.Contains(1, helNode.SubTreeTokenIds);
+    }
+}
+
+public sealed class TrieBuilding_HandlesJapaneseTextTokensTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var trie = TrieBuilder.BuildTrie([
+            (0, "さくら".ToCodePoints()),
+            (1, "サクラ".ToCodePoints()),
+            (2, "桜".ToCodePoints()),
+        ]);
+
+        Assert.Contains(0, trie.Traverse("さくら".ToCodePoints().ToArray())?.TokenIds ?? []);
+        Assert.Contains(1, trie.Traverse("サクラ".ToCodePoints().ToArray())?.TokenIds ?? []);
+        Assert.Contains(2, trie.Traverse("桜".ToCodePoints().ToArray())?.TokenIds ?? []);
+    }
+}
+
+#endregion
+
+#region Trie Serialization
+
+public sealed class TrieSerialization_SerializesAndDeserializesCorrectlyTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var originalTrie = TrieBuilder.BuildTrie([
+            (0, "apple".ToCodePoints()),
+            (1, "app".ToCodePoints()),
+            (2, "banana".ToCodePoints()),
+        ]);
+
+        // Serialize
+        var serialized = TrieSerializer.Serialize(originalTrie);
+        Assert.True(serialized.Length > 0);
+
+        // Deserialize
+        var deserialized = TrieDeserializer.Deserialize(serialized);
+        var deserializedTrie = deserialized.Root;
+        var tokenCodePoints = deserialized.TokenCodePoints;
+
+        // Verify structure is preserved
+        var appleNode = deserializedTrie.Traverse("apple".ToCodePoints().ToArray());
+        var appNode = deserializedTrie.Traverse("app".ToCodePoints().ToArray());
+        var bananaNode = deserializedTrie.Traverse("banana".ToCodePoints().ToArray());
+
+        Assert.NotNull(appleNode);
+        Assert.NotNull(appNode);
+        Assert.NotNull(bananaNode);
+
+        Assert.Contains(0, appleNode!.TokenIds);
+        Assert.Contains(1, appNode!.TokenIds);
+        Assert.Contains(2, bananaNode!.TokenIds);
+
+        // Verify tokenCodePoints map
+        Assert.Equal("apple", tokenCodePoints[0].ToUtf32String());
+        Assert.Equal("app", tokenCodePoints[1].ToUtf32String());
+        Assert.Equal("banana", tokenCodePoints[2].ToUtf32String());
+
+        // Verify subTreeTokenIds are reconstructed
+        Assert.Contains(0, appNode.SubTreeTokenIds);
+        Assert.Contains(1, appNode.SubTreeTokenIds);
+    }
+}
+
+public sealed class TrieSerialization_PreservesParentReferencesAfterDeserializationTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var originalTrie = TrieBuilder.BuildTrie([
+            (0, "test".ToCodePoints()),
+        ]);
+
+        var serialized = TrieSerializer.Serialize(originalTrie);
+        var deserialized = TrieDeserializer.Deserialize(serialized);
+        var root = deserialized.Root;
+
+        var testNode = root.Traverse("test".ToCodePoints().ToArray());
+        Assert.NotNull(testNode);
+
+        // Walk back to root via parent references
+        TrieNode? node = testNode;
+        var depth = 0;
+        while (node?.Parent != null)
+        {
+            node = node.Parent;
+            depth++;
+        }
+        Assert.Equal(4, depth); // 't' -> 'e' -> 's' -> 't' -> root
+        Assert.Same(root, node);
+    }
+}
+
+#endregion
+
+
@@ -0,0 +1,75 @@
+using MaigoLabs.NeedLe.Indexer.Han;
+
+namespace MaigoLabs.NeedLe.Tests.Indexer.Han;
+
+#region IsHanCharacter
+
+public sealed class IsHanCharacter_ReturnsTrueForCjkCharactersTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.True(HanVariantProvider.IsHanCharacter('中'));
+        Assert.True(HanVariantProvider.IsHanCharacter('国'));
+        Assert.True(HanVariantProvider.IsHanCharacter('日'));
+        Assert.True(HanVariantProvider.IsHanCharacter('本'));
+    }
+}
+
+public sealed class IsHanCharacter_ReturnsFalseForNonCjkCharactersTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.False(HanVariantProvider.IsHanCharacter('a'));
+        Assert.False(HanVariantProvider.IsHanCharacter('あ'));
+        Assert.False(HanVariantProvider.IsHanCharacter('ア'));
+        Assert.False(HanVariantProvider.IsHanCharacter('1'));
+    }
+}
+
+#endregion
+
+#region GetHanVariants
+
+public sealed class GetHanVariants_ReturnsVariantsForSimplifiedTraditionalTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var provider = new HanVariantProvider();
+        // 国 (simplified) and 國 (traditional) should be variants of each other
+        var variants1 = provider.GetHanVariants('国');
+        var variants2 = provider.GetHanVariants('國');
+        Assert.Contains('国', variants1);
+        Assert.Contains('國', variants1);
+        Assert.Contains('国', variants2);
+        Assert.Contains('國', variants2);
+    }
+}
+
+public sealed class GetHanVariants_ReturnsCharacterItselfForNoVariantsTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var provider = new HanVariantProvider();
+        var variants = provider.GetHanVariants('一');
+        Assert.Contains('一', variants);
+    }
+}
+
+public sealed class GetHanVariants_ReturnsEmptyForNonHanCharactersTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var provider = new HanVariantProvider();
+        Assert.Empty(provider.GetHanVariants('a'));
+        Assert.Empty(provider.GetHanVariants('あ'));
+    }
+}
+
+#endregion
+
+
@@ -0,0 +1,51 @@
+using MaigoLabs.NeedLe.Indexer.Han;
+
+namespace MaigoLabs.NeedLe.Tests.Indexer.Han;
+
+public sealed class GetPinyinCandidates_ReturnsPinyinForHanCharacterTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var candidates = PinyinHelper.GetPinyinCandidates('中').ToList();
+        Assert.Contains("zhong", candidates);
+        Assert.Contains("zh", candidates); // initial
+        Assert.Contains("z", candidates); // first letter
+    }
+}
+
+public sealed class GetPinyinCandidates_ReturnsMultiplePinyinForPolyphonicTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        // 行 can be "xing" or "hang"
+        var candidates = PinyinHelper.GetPinyinCandidates('行').ToList();
+        Assert.Contains("xing", candidates);
+        Assert.Contains("hang", candidates);
+    }
+}
+
+public sealed class GetPinyinCandidates_IncludesFuzzyPinyinVariantsTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        // 风 is "feng", should also have fuzzy variant "fen"
+        var candidates = PinyinHelper.GetPinyinCandidates('风').ToList();
+        Assert.Contains("feng", candidates);
+        Assert.Contains("fen", candidates); // fuzzy: eng -> en
+    }
+}
+
+public sealed class GetPinyinCandidates_ReturnsEmptyForNonHanCharactersTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Empty(PinyinHelper.GetPinyinCandidates('a'));
+        Assert.Empty(PinyinHelper.GetPinyinCandidates('あ'));
+    }
+}
+
+
@@ -0,0 +1,59 @@
+using MaigoLabs.NeedLe.Indexer.Han;
+
+namespace MaigoLabs.NeedLe.Tests.Indexer.Han;
+
+public sealed class UnionFindSet_FindsSelfAsRootInitiallyTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var ufs = new UnionFindSet();
+        Assert.Equal(1, ufs.Find(1));
+        Assert.Equal(2, ufs.Find(2));
+    }
+}
+
+public sealed class UnionFindSet_UnionsTwoElementsTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var ufs = new UnionFindSet();
+        ufs.Union(1, 2);
+        Assert.Equal(ufs.Find(1), ufs.Find(2));
+    }
+}
+
+public sealed class UnionFindSet_UnionsMultipleElementsTransitivelyTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var ufs = new UnionFindSet();
+        ufs.Union(1, 2);
+        ufs.Union(2, 3);
+        ufs.Union(4, 5);
+        Assert.Equal(ufs.Find(1), ufs.Find(3));
+        Assert.NotEqual(ufs.Find(1), ufs.Find(4));
+        ufs.Union(3, 4);
+        Assert.Equal(ufs.Find(1), ufs.Find(5));
+    }
+}
+
+public sealed class UnionFindSet_IteratesAllKeysTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var ufs = new UnionFindSet();
+        ufs.Union(1, 2);
+        ufs.Union(3, 4);
+        var keys = ufs.Keys.ToList();
+        Assert.Contains(1, keys);
+        Assert.Contains(2, keys);
+        Assert.Contains(3, keys);
+        Assert.Contains(4, keys);
+    }
+}
+
+
@@ -0,0 +1,69 @@
+using MaigoLabs.NeedLe.Indexer.Japanese;
+
+namespace MaigoLabs.NeedLe.Tests.Indexer.Japanese;
+
+#region ToRomajiStrictly
+
+public sealed class ToRomajiStrictly_ConvertsBasicKanaToRomajiTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("a", JapaneseUtils.ToRomajiStrictly("あ"));
+        Assert.Equal("ka", JapaneseUtils.ToRomajiStrictly("か"));
+        Assert.Equal("sakura", JapaneseUtils.ToRomajiStrictly("さくら"));
+    }
+}
+
+public sealed class ToRomajiStrictly_ConvertsKatakanaToRomajiTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("a", JapaneseUtils.ToRomajiStrictly("ア"));
+        Assert.Equal("ka", JapaneseUtils.ToRomajiStrictly("カ"));
+        Assert.Equal("sakura", JapaneseUtils.ToRomajiStrictly("サクラ"));
+    }
+}
+
+public sealed class ToRomajiStrictly_HandlesLongVowelsTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("ou", JapaneseUtils.ToRomajiStrictly("おう"));
+        Assert.Equal("oo", JapaneseUtils.ToRomajiStrictly("おお"));
+    }
+}
+
+public sealed class ToRomajiStrictly_ReturnsEmptyForInvalidFirstCharacterTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("", JapaneseUtils.ToRomajiStrictly("ー")); // prolonged sound mark cannot be first
+        Assert.Equal("", JapaneseUtils.ToRomajiStrictly("ゃ")); // small ya cannot be first
+    }
+}
+
+public sealed class ToRomajiStrictly_ReturnsEmptyForInvalidLastCharacterTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("", JapaneseUtils.ToRomajiStrictly("っ")); // small tsu cannot be last
+    }
+}
+
+public sealed class ToRomajiStrictly_HandlesGeminationTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        Assert.Equal("katta", JapaneseUtils.ToRomajiStrictly("かった"));
+    }
+}
+
+#endregion
+
+
@@ -0,0 +1,40 @@
+using MaigoLabs.NeedLe.Indexer.Japanese;
+
+namespace MaigoLabs.NeedLe.Tests.Indexer.Japanese;
+
+public sealed class GetAllKanaReadings_ReturnsKatakanaForPureKanaInputTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var provider = new TranscriptionProvider();
+        var readings = provider.GetAllKanaReadings("あ");
+        Assert.Contains("ア", readings);
+    }
+}
+
+public sealed class GetAllKanaReadings_ReturnsReadingsForKanjiTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var provider = new TranscriptionProvider();
+        var readings = provider.GetAllKanaReadings("僕");
+        Assert.NotEmpty(readings);
+        // 僕 should have reading ボク
+        Assert.Contains("ボク", readings);
+    }
+}
+
+public sealed class GetAllKanaReadings_ReturnsReadingsForCompoundWordsTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var provider = new TranscriptionProvider();
+        var readings = provider.GetAllKanaReadings("和風");
+        Assert.NotEmpty(readings);
+    }
+}
+
+
@@ -0,0 +1,165 @@
+using MaigoLabs.NeedLe.Common.Types;
+using MaigoLabs.NeedLe.Indexer;
+
+namespace MaigoLabs.NeedLe.Tests.Indexer;
+
+public sealed class Tokenizer_TokenizesMixedJapaneseTextTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var tokenizer = new Tokenizer(TokenizerOptions);
+        var tokens = tokenizer.Tokenize("僕の和風本当上手");
+
+        var tokenDefs = tokenizer.Tokens.Values.ToList();
+
+        // Should have tokens of various types
+        var types = tokenDefs.Select(t => t.Type).ToHashSet();
+        Assert.Contains(TokenType.Han, types);
+        Assert.Contains(TokenType.Pinyin, types);
+        Assert.Contains(TokenType.Kana, types);
+        Assert.Contains(TokenType.Romaji, types);
+
+        // Helper to get token texts at a specific position by type
+        List<string> GetTokenTextsAt(int pos, TokenType type) => tokens
+            .Where(t => t.Start <= pos && t.End > pos)
+            .Select(t => tokenDefs.First(d => d.Id == t.Id))
+            .Where(d => d.Type == type)
+            .Select(d => d.Text)
+            .ToList();
+
+        // Position 0: 僕
+        Assert.Contains("僕", GetTokenTextsAt(0, TokenType.Han));
+        Assert.Contains("pu", GetTokenTextsAt(0, TokenType.Pinyin));
+        Assert.Contains("ボク", GetTokenTextsAt(0, TokenType.Kana));
+        Assert.Contains("boku", GetTokenTextsAt(0, TokenType.Romaji));
+
+        // Position 1: の (hiragana, no Han/Pinyin)
+        Assert.Empty(GetTokenTextsAt(1, TokenType.Han));
+        Assert.Empty(GetTokenTextsAt(1, TokenType.Pinyin));
+        Assert.Contains("ノ", GetTokenTextsAt(1, TokenType.Kana));
+        Assert.Contains("no", GetTokenTextsAt(1, TokenType.Romaji));
+
+        // Position 2: 和
+        Assert.Contains("和", GetTokenTextsAt(2, TokenType.Han));
+        Assert.Contains("he", GetTokenTextsAt(2, TokenType.Pinyin));
+        Assert.Contains("ワ", GetTokenTextsAt(2, TokenType.Kana));
+        Assert.Contains("wa", GetTokenTextsAt(2, TokenType.Romaji));
+
+        // Position 3: 風
+        Assert.Contains("風", GetTokenTextsAt(3, TokenType.Han));
+        Assert.Contains("风", GetTokenTextsAt(3, TokenType.Han)); // simplified variant
+        Assert.Contains("feng", GetTokenTextsAt(3, TokenType.Pinyin));
+        Assert.Contains("フウ", GetTokenTextsAt(3, TokenType.Kana));
+        Assert.Contains("fu", GetTokenTextsAt(3, TokenType.Romaji));
+
+        // Position 4: 本
+        Assert.Contains("本", GetTokenTextsAt(4, TokenType.Han));
+        Assert.Contains("ben", GetTokenTextsAt(4, TokenType.Pinyin));
+        Assert.Contains("ホン", GetTokenTextsAt(4, TokenType.Kana));
+        Assert.Contains("hon", GetTokenTextsAt(4, TokenType.Romaji));
+
+        // Position 5: 当
+        Assert.Contains("当", GetTokenTextsAt(5, TokenType.Han));
+        Assert.Contains("當", GetTokenTextsAt(5, TokenType.Han)); // traditional variant
+        Assert.Contains("dang", GetTokenTextsAt(5, TokenType.Pinyin));
+        Assert.Contains("トウ", GetTokenTextsAt(5, TokenType.Kana));
+        Assert.Contains("to", GetTokenTextsAt(5, TokenType.Romaji)); // normalized: tou -> to
+
+        // Position 6: 上
+        Assert.Contains("上", GetTokenTextsAt(6, TokenType.Han));
+        Assert.Contains("shang", GetTokenTextsAt(6, TokenType.Pinyin));
+        Assert.Contains("ジョウ", GetTokenTextsAt(6, TokenType.Kana));
+        Assert.Contains("jo", GetTokenTextsAt(6, TokenType.Romaji)); // normalized: jou -> jo
+
+        // Position 7: 手
+        Assert.Contains("手", GetTokenTextsAt(7, TokenType.Han));
+        Assert.Contains("shou", GetTokenTextsAt(7, TokenType.Pinyin));
+        Assert.Contains("シュ", GetTokenTextsAt(7, TokenType.Kana));
+        Assert.Contains("shu", GetTokenTextsAt(7, TokenType.Romaji));
+    }
+}
+
+public sealed class Tokenizer_NoDuplicateTokensTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var tokenizer = new Tokenizer(TokenizerOptions);
+
+        // Tokenize multiple music names that share some characters
+        tokenizer.Tokenize("僕の和風本当上手");
+        tokenizer.Tokenize("僕");
+        tokenizer.Tokenize("和風");
+
+        // Check that there are no duplicate tokens
+        var tokenDefs = tokenizer.Tokens.Values.ToList();
+        var tokenKeys = tokenDefs.Select(t => $"{t.Type}:{t.Text}").ToList();
+        var uniqueKeys = tokenKeys.ToHashSet();
+
+        Assert.Equal(uniqueKeys.Count, tokenKeys.Count);
+
+        // Also check that IDs are unique
+        var ids = tokenDefs.Select(t => t.Id).ToList();
+        var uniqueIds = ids.ToHashSet();
+        Assert.Equal(uniqueIds.Count, ids.Count);
+    }
+}
+
+public sealed class Tokenizer_HandlesRawTokensForNonCjkTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var tokenizer = new Tokenizer(TokenizerOptions);
+        tokenizer.Tokenize("a-b");
+
+        var tokenDefs = tokenizer.Tokens.Values.ToList();
+        var rawTokenTexts = tokenDefs.Where(t => t.Type == TokenType.Raw).Select(t => t.Text).ToList();
+
+        Assert.Contains("a", rawTokenTexts);
+        Assert.Contains("-", rawTokenTexts);
+        Assert.Contains("b", rawTokenTexts);
+    }
+}
+
+public sealed class Tokenizer_TokenizesCompoundWordKyouTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var tokenizer = new Tokenizer(TokenizerOptions);
+        var tokens = tokenizer.Tokenize("今日");
+        var tokenDefs = tokenizer.Tokens.Values.ToList();
+
+        // Helper to get tokens with specific type and span
+        List<string> GetTokensWithSpan(TokenType type, int start, int end) => tokens
+            .Where(t => t.Start == start && t.End == end)
+            .Select(t => tokenDefs.First(d => d.Id == t.Id))
+            .Where(d => d.Type == type)
+            .Select(d => d.Text)
+            .ToList();
+
+        // Individual character readings at position 0: 今
+        Assert.Contains("今", GetTokensWithSpan(TokenType.Han, 0, 1));
+        Assert.Contains("jin", GetTokensWithSpan(TokenType.Pinyin, 0, 1));
+        Assert.Contains("コン", GetTokensWithSpan(TokenType.Kana, 0, 1));
+        Assert.Contains("イマ", GetTokensWithSpan(TokenType.Kana, 0, 1));
+        Assert.Contains("kon", GetTokensWithSpan(TokenType.Romaji, 0, 1));
+        Assert.Contains("ima", GetTokensWithSpan(TokenType.Romaji, 0, 1));
+
+        // Individual character readings at position 1: 日
+        Assert.Contains("日", GetTokensWithSpan(TokenType.Han, 1, 2));
+        Assert.Contains("ri", GetTokensWithSpan(TokenType.Pinyin, 1, 2));
+        Assert.Contains("ニチ", GetTokensWithSpan(TokenType.Kana, 1, 2));
+        Assert.Contains("ヒ", GetTokensWithSpan(TokenType.Kana, 1, 2));
+        Assert.Contains("niti", GetTokensWithSpan(TokenType.Romaji, 1, 2));
+        Assert.Contains("hi", GetTokensWithSpan(TokenType.Romaji, 1, 2));
+
+        // Combined reading for "今日" [0, 2] - this is an indivisible compound word
+        Assert.Contains("キョウ", GetTokensWithSpan(TokenType.Kana, 0, 2));
+        Assert.Contains("kyo", GetTokensWithSpan(TokenType.Romaji, 0, 2)); // normalized: kyou -> kyo
+    }
+}
+
+
@@ -0,0 +1,66 @@
+using MaigoLabs.NeedLe.Common;
+using MaigoLabs.NeedLe.Common.Extensions;
+using MaigoLabs.NeedLe.Indexer.Trie;
+
+namespace MaigoLabs.NeedLe.Tests.Indexer;
+
+#region GraftTriePaths
+
+public sealed class GraftTriePaths_GraftsPathsAccordingToNormalizationRulesTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        // Build a trie with tokens containing normalized forms
+        var trie = TrieBuilder.BuildTrie([
+            (0, "sya".ToCodePoints()), // normalized form of "sha"
+            (1, "tu".ToCodePoints()),  // normalized form of "tsu"
+        ]);
+
+        // Graft paths so that "sha" -> "sya" and "tsu" -> "tu"
+        TrieBuilder.GraftTriePaths(trie, [
+            ("sha".ToCodePoints().ToArray(), "sya".ToCodePoints().ToArray()),
+            ("tsu".ToCodePoints().ToArray(), "tu".ToCodePoints().ToArray()),
+        ]);
+
+        // Now we should be able to traverse using both the original and grafted paths
+        var syaNode = trie.Traverse("sya".ToCodePoints().ToArray());
+        var shaNode = trie.Traverse("sha".ToCodePoints().ToArray());
+        Assert.NotNull(syaNode);
+        Assert.NotNull(shaNode);
+        Assert.Same(syaNode, shaNode); // Both paths should lead to the same node
+
+        var tuNode = trie.Traverse("tu".ToCodePoints().ToArray());
+        var tsuNode = trie.Traverse("tsu".ToCodePoints().ToArray());
+        Assert.NotNull(tuNode);
+        Assert.NotNull(tsuNode);
+        Assert.Same(tuNode, tsuNode);
+    }
+}
+
+public sealed class GraftTriePaths_HandlesChainedGraftRulesTest : NeedleTestBase
+{
+    [Fact]
+    public void Execute()
+    {
+        var trie = TrieBuilder.BuildTrie([
+            (0, "o".ToCodePoints()), // normalized vowel
+        ]);
+
+        // Chain: "ou" -> "o", "oo" -> "o"
+        TrieBuilder.GraftTriePaths(trie, [
+            ("ou".ToCodePoints().ToArray(), "o".ToCodePoints().ToArray()),
+            ("oo".ToCodePoints().ToArray(), "o".ToCodePoints().ToArray()),
+        ]);
+
+        var oNode = trie.Traverse("o".ToCodePoints().ToArray());
+        var ouNode = trie.Traverse("ou".ToCodePoints().ToArray());
+        var ooNode = trie.Traverse("oo".ToCodePoints().ToArray());
+
+        Assert.NotNull(oNode);
+        Assert.Same(oNode, ouNode);
+        Assert.Same(oNode, ooNode);
+    }
+}
+
+#endregion
@@ -0,0 +1,28 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net10.0</TargetFramework>
+    <RootNamespace>$(ProjectName).Tests</RootNamespace>
+    <AssemblyName>$(RootNamespace)</AssemblyName>
+    <IsPackable>false</IsPackable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="coverlet.collector" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" />
+    <PackageReference Include="xunit" />
+    <PackageReference Include="xunit.runner.visualstudio" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Common\MaigoLabs.NeedLe.Common.csproj" />
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Indexer\MaigoLabs.NeedLe.Indexer.csproj" />
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Searcher\MaigoLabs.NeedLe.Searcher.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Using Include="Xunit" />
+  </ItemGroup>
+
+</Project>
+
@@ -0,0 +1,12 @@
+using MaigoLabs.NeedLe.Indexer;
+using MaigoLabs.NeedLe.Indexer.Han;
+using MaigoLabs.NeedLe.Indexer.Japanese;
+
+namespace MaigoLabs.NeedLe.Tests;
+
+public abstract class NeedleTestBase
+{
+    public static HanVariantProvider HanVariantProvider { get; set; } = new();
+    public static TranscriptionProvider TranscriptionProvider { get; set; } = new();
+    public static TokenizerOptions TokenizerOptions => new() { HanVariantProvider = HanVariantProvider, TranscriptionProvider = TranscriptionProvider };
+}
@@ -0,0 +1,16 @@
+<Solution>
+
+  <Configurations>
+    <Platform Name="Any CPU" />
+    <Platform Name="x64" />
+    <Platform Name="x86" />
+  </Configurations>
+
+  <Project Path="MaigoLabs.NeedLe/MaigoLabs.NeedLe.csproj" />
+  <Project Path="MaigoLabs.NeedLe.Common/MaigoLabs.NeedLe.Common.csproj" />
+  <Project Path="MaigoLabs.NeedLe.Indexer/MaigoLabs.NeedLe.Indexer.csproj" />
+  <Project Path="MaigoLabs.NeedLe.Searcher/MaigoLabs.NeedLe.Searcher.csproj" />
+  <Project Path="MaigoLabs.NeedLe.Playground/MaigoLabs.NeedLe.Playground.csproj" />
+  <Project Path="MaigoLabs.NeedLe.Tests/MaigoLabs.NeedLe.Tests.csproj" />
+
+</Solution>
@@ -0,0 +1,35 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Library</OutputType>
+    <RootNamespace>$(ProjectName)</RootNamespace>
+    <AssemblyName>$(RootNamespace)</AssemblyName>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <IsPackable>true</IsPackable>
+    <PackageId>$(RootNamespace)</PackageId>
+    <IncludeBuildOutput>false</IncludeBuildOutput>
+    <IncludeContentInPack>false</IncludeContentInPack>
+    <NoPackageAnalysis>true</NoPackageAnalysis>
+    <MeCabUseDefaultDictionary>False</MeCabUseDefaultDictionary>
+    <PackageReadmeFile></PackageReadmeFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Common\MaigoLabs.NeedLe.Common.csproj" PrivateAssets="none" />
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Indexer\MaigoLabs.NeedLe.Indexer.csproj" PrivateAssets="none" />
+    <ProjectReference Include="..\MaigoLabs.NeedLe.Searcher\MaigoLabs.NeedLe.Searcher.csproj" PrivateAssets="none" />
+  </ItemGroup>
+
+  <!-- Directly add README to package files -->
+  <Target Name="AddReadmeToPackage" BeforeTargets="GenerateNuspec">
+    <ItemGroup>
+      <_PackageFiles Include="..\README.md" PackagePath="/" />
+    </ItemGroup>
+    <PropertyGroup>
+      <PackageReadmeFile>README.md</PackageReadmeFile>
+    </PropertyGroup>
+  </Target>
+
+</Project>
@@ -0,0 +1,57 @@
+# `MaigoLabs.NeedLe`
+
+Fuzzy search engine for small text pieces, with Chinese/Japanese pronunciation support.
+
+See also [in-browser demo](https://needle.maigo.dev) (TypeScript version, but the same features as in C#).
+
+## Install
+
+```bash
+dotnet add package MaigoLabs.NeedLe
+```
+
+Or install sub-packages separately:
+
+```bash
+dotnet add package MaigoLabs.NeedLe.Indexer  # For building indexes
+dotnet add package MaigoLabs.NeedLe.Searcher # For searching only
+```
+
+## Usage
+
+### Indexing
+
+Indexing requires dictionaries. These are installed as dependencies of the `MaigoLabs.NeedLe.Indexer` package:
+
+* MeCab.DotNet
+* OpenccNetLib
+* hyjiacan.pinyin4net
+
+```csharp
+using MaigoLabs.NeedLe.Indexer;
+
+var documents = new[] { "你好世界", "こんにちは" };
+var compressedIndex = InvertedIndexBuilder.BuildInvertedIndex(documents);
+// To customize dictionary paths, pass the second argument `TokenizerOptions` to `BuildInvertedIndex`.
+
+// The built index could be stored for later use, or sent to frontend to load with TypeScript package `@maigolabs/needle`.
+// For compatibility with .NET Standard, we don't provide JSON related methods. You can use any JSON library to serialize/deserialize the index in the way you prefer.
+var json = JsonSerializer.Serialize(compressedIndex);
+```
+
+### Searching
+
+Searching requires a prebuilt index but doesn't require dictionaries. Searcher is a lightweight package without dependencies.
+
+```csharp
+using MaigoLabs.NeedLe.Searcher;
+
+// Index returned by `BuildInvertedIndex`.
+var index = InvertedIndexLoader.Load(compressedIndex);
+
+var results = InvertedIndexSearcher.Search(index, "sekai");
+foreach (var result in results) Console.WriteLine($"{result.DocumentText} ({result.MatchRatio:P0})")
+// → 你好世界 (50%)
+```
+
+To highlight the search result, see also `SearchResultHighlighter`.
@@ -0,0 +1,157 @@
+import tsParser from '@typescript-eslint/parser';
+import tsPlugin from '@typescript-eslint/eslint-plugin';
+import importPlugin from 'eslint-plugin-import';
+import stylisticPlugin from '@stylistic/eslint-plugin';
+
+import type { Linter } from 'eslint';
+
+const commonConfig: Linter.Config = {
+  plugins: {
+    import: importPlugin,
+    '@typescript-eslint': tsPlugin as any,
+    stylistic: stylisticPlugin,
+  },
+  rules: {
+    'import/order': [
+      'error',
+      {
+        groups: ['builtin', 'external', ['internal', 'parent', 'sibling', 'index']],
+        pathGroups: [
+          {
+            pattern: '@proj-marina/**',
+            group: 'internal',
+            position: 'before',
+          },
+          {
+            pattern: '@/**',
+            group: 'internal',
+            position: 'before',
+          },
+        ],
+        'newlines-between': 'always',
+        distinctGroup: false,
+        alphabetize: {
+          order: 'asc',
+          caseInsensitive: true,
+        },
+      },
+    ],
+    'import/no-duplicates': 'error',
+
+    '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
+    'prefer-const': 'error',
+    'no-var': 'error',
+    'no-debugger': 'error',
+    'object-shorthand': 'error',
+    'prefer-template': 'error',
+    eqeqeq: ['error', 'always', { null: 'ignore' }],
+
+    '@typescript-eslint/prefer-optional-chain': 'error',
+    '@typescript-eslint/prefer-nullish-coalescing': 'error',
+    '@typescript-eslint/return-await': ['error', 'always'],
+    '@typescript-eslint/no-floating-promises': 'error',
+    '@typescript-eslint/await-thenable': 'error',
+    '@typescript-eslint/no-misused-promises': ['error'],
+    '@typescript-eslint/prefer-as-const': 'error',
+    '@typescript-eslint/prefer-for-of': 'error',
+    '@typescript-eslint/prefer-includes': 'error',
+    '@typescript-eslint/prefer-string-starts-ends-with': 'error',
+    '@typescript-eslint/consistent-type-imports': ['error', { disallowTypeAnnotations: false }],
+
+    'stylistic/indent': ['error', 2, {
+      'offsetTernaryExpressions': true
+    }],
+    'stylistic/linebreak-style': ['error', 'unix'],
+    'stylistic/semi': ['error', 'always'],
+    'stylistic/quotes': ['error', 'single', {
+      'avoidEscape': true,
+      'allowTemplateLiterals': 'avoidEscape',
+    }],
+    'stylistic/comma-dangle': ['error', 'always-multiline'],
+    'stylistic/arrow-parens': ['error', 'as-needed'],
+    'stylistic/object-curly-spacing': ['error', 'always'],
+    'stylistic/array-bracket-spacing': ['error', 'never'],
+    'stylistic/space-before-function-paren': ['error', {
+      'anonymous': 'always',
+      'named': 'never',
+      'asyncArrow': 'always',
+    }],
+    'stylistic/space-in-parens': ['error', 'never'],
+    'stylistic/comma-spacing': ['error', { 'before': false, 'after': true }],
+    'stylistic/key-spacing': ['error', { 'beforeColon': false, 'afterColon': true }],
+    'stylistic/keyword-spacing': ['error'],
+    'stylistic/space-before-blocks': ['error', 'always'],
+    'stylistic/space-infix-ops': ['error'],
+    'stylistic/no-trailing-spaces': ['error'],
+    'stylistic/eol-last': ['error', 'always'],
+    'stylistic/no-multiple-empty-lines': ['error', { 'max': 1, 'maxEOF': 0 }],
+    'stylistic/brace-style': ['error', '1tbs', { 'allowSingleLine': true }],
+    'stylistic/object-curly-newline': ['error', {
+      'ObjectExpression': { 'multiline': true, 'consistent': true },
+      'ObjectPattern': { 'multiline': true, 'consistent': true },
+      'ImportDeclaration': { 'multiline': true, 'consistent': true },
+      'ExportDeclaration': { 'multiline': true, 'consistent': true }
+    }],
+    'stylistic/array-bracket-newline': ['error', 'consistent'],
+    'stylistic/function-paren-newline': ['error', 'consistent'],
+    'stylistic/member-delimiter-style': ['error', {
+      'multiline': {
+        'delimiter': 'semi',
+        'requireLast': true
+      },
+      'singleline': {
+        'delimiter': 'semi',
+        'requireLast': false
+      }
+    }],
+    'stylistic/type-annotation-spacing': ['error'],
+    'stylistic/jsx-quotes': ['error', 'prefer-double'],
+
+  },
+  settings: {
+    'import/internal-regex': '^@proj-marina/',
+    'import/resolver': {
+      typescript: {
+        project: ['./apps/*/tsconfig.json', './packages/*/tsconfig.json'],
+        noWarnOnMultipleProjects: true,
+      },
+    },
+  },
+};
+
+const parserOptions: Linter.ParserOptions = {
+  parser: tsParser,
+  ecmaVersion: 'latest',
+  sourceType: 'module',
+  project: ['./apps/*/tsconfig.json', './packages/*/tsconfig.json'],
+  noWarnOnMultipleProjects: true,
+};
+
+const config: Linter.Config[] = [
+  {
+    ...commonConfig,
+    files: ['**/*.{ts,tsx}'],
+    languageOptions: {
+      parser: tsParser,
+      ecmaVersion: 'latest',
+      sourceType: 'module',
+      parserOptions,
+    },
+  },
+  {
+    ignores: [
+      '**/node_modules/**',
+      // Build output
+      '**/dist/**',
+      '**/build/**',
+      '**/coverage/**',
+      'eslint.config.ts',
+      '**/uno.config.ts',
+      '**/vite.config.ts',
+      '**/jest.config.ts',
+      '**/tsdown.config.ts',
+    ],
+  },
+];
+
+export default config;
@@ -0,0 +1,36 @@
+{
+  "name": "@maigolabs/needle-root",
+  "version": "1.0.0",
+  "type": "module",
+  "scripts": {
+    "build:packages": "pnpm -F=\"./packages/*\" run build",
+    "build:demo": "pnpm -F=\"./apps/demo\" build",
+    "dev:demo": "pnpm -F=\"./apps/demo\" dev",
+    "typecheck": "pnpm -rF=\"./packages/*\" -F=\"./apps/*\" typecheck",
+    "test": "pnpm -rF=\"./packages/*\" -F=\"./apps/*\" test",
+    "test:dotnet": "cd dotnet && dotnet test",
+    "lint": "eslint --cache --ext .",
+    "lint:fix": "eslint --cache --ext . --fix"
+  },
+  "license": "AGPL-3.0",
+  "packageManager": "pnpm@10.20.0",
+  "private": true,
+  "devDependencies": {
+    "@eslint/js": "^9.39.1",
+    "@stylistic/eslint-plugin": "^5.5.0",
+    "@typescript-eslint/eslint-plugin": "^8.46.3",
+    "@typescript-eslint/parser": "^8.46.3",
+    "cross-env": "^10.1.0",
+    "eslint": "^9.39.1",
+    "eslint-import-resolver-typescript": "^4.4.4",
+    "eslint-plugin-import": "^2.32.0",
+    "jiti": "^2.6.1",
+    "tsdown": "^0.18.4",
+    "tsx": "^4.21.0",
+    "typescript": "^5.9.3",
+    "unplugin-unused": "^0.5.6"
+  },
+  "dependencies": {
+    "@types/node": "^24.10.0"
+  }
+}
@@ -0,0 +1 @@
+../../LICENSE
@@ -0,0 +1,72 @@
+# `@maigolabs/needle`
+
+Fuzzy search engine for small text pieces, with Chinese/Japanese pronunciation support.
+
+See also [in-browser demo](https://needle.maigo.dev).
+
+## Install
+
+Dictionaries are installed as dependencies of the package, but if you don't use the indexer, they could be tree-shaken when bundling.
+
+```bash
+pnpm install @maigolabs/needle
+```
+
+## Usage
+
+### Indexing
+
+NeedLe uses Kuromoji for Japanese tokenization, which loads dictionaries dynamically. You need to create a Kuromoji `TokenizerBuilder` first:
+
+```ts
+// In Node.js you can just load the dictionary from the file system.
+
+import { TokenizerBuilder } from '@patdx/kuromoji';
+import NodeDictionaryLoader from '@patdx/kuromoji/node';
+
+const kuromojiDictPath = path.resolve(url.fileURLToPath(import.meta.resolve('@patdx/kuromoji')), '..', '..', 'dict');
+const kuromoji = await new TokenizerBuilder({ loader: new NodeDictionaryLoader({ dic_path: kuromojiDictPath }) }).build();
+
+// In browser you need to provide a custom loader to load the dictionary files with fetch().
+
+import { TokenizerBuilder } from '@patdx/kuromoji';
+
+// You can load dict files from CDN (See also the README of https://github.com/patdx/kuromoji.js)
+const kuromoji = await new TokenizerBuilder({
+  loader: {
+    loadArrayBuffer: async (url: string) => {
+      url = `https://cdn.jsdelivr.net/npm/@aiktb/kuromoji@1.0.2/dict/${url.replace('.gz', '')}`;
+      const res = await fetch(url);
+      if (!res.ok) throw new Error(`Failed to fetch ${url}`);
+      return await res.arrayBuffer();
+    },
+  },
+}).build();
+```
+
+After creating the Kuromoji instance, you can build the inverted index:
+
+```ts
+import { buildInvertedIndex } from '@maigolabs/needle/indexer';
+
+const documents = ['你好世界', 'こんにちは'];
+const compressedIndex = buildInvertedIndex(documents, { kuromoji });
+
+// The built index could be stored for later use.
+const json = JSON.stringify(compressedIndex);
+```
+
+### Searching
+
+If you only import the searcher in your frontend code, indexer and dictionary-related dependencies will be tree-shaken.
+
+```ts
+import { loadInvertedIndex, searchInvertedIndex } from '@maigolabs/needle/searcher';
+
+const loadedIndex = loadInvertedIndex(compressedIndex);
+const results = searchInvertedIndex(loadedIndex, 'sekai');
+for (const result of results) console.log(`${result.documentText} (${(result.matchRatio * 100).toFixed(0)}%)`);
+// → 你好世界 (50%)
+```
+
+To highlight the search result, see also `highlightSearchResult`.
@@ -0,0 +1,18 @@
+import type { Config } from 'jest';
+
+const config: Config = {
+  preset: 'ts-jest/presets/default-esm',
+  testEnvironment: 'node',
+  extensionsToTreatAsEsm: ['.ts'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+  },
+  transform: {
+    '^.+\\.tsx?$': ['ts-jest', { useESM: true }],
+  },
+  testMatch: ['**/*.test.ts'],
+  testTimeout: 30000,
+};
+
+export default config;
+
@@ -0,0 +1,84 @@
+{
+  "name": "@maigolabs/needle",
+  "version": "1.0.1",
+  "description": "Fuzzy search engine for small text pieces, with Chinese/Japanese pronunciation support.",
+  "type": "module",
+  "main": "./src/index.ts",
+  "scripts": {
+    "build": "tsdown",
+    "typecheck": "tsc",
+    "test": "cross-env NODE_OPTIONS=--experimental-vm-modules jest",
+    "prepare": "pnpm run build"
+  },
+  "license": "AGPL-3.0",
+  "homepage": "https://needle.maigo.dev",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/MaigoLabs/needLe.git",
+    "directory": "packages/needle"
+  },
+  "bugs": "https://github.com/MaigoLabs/needLe/issues",
+  "keywords": [
+    "needle",
+    "search",
+    "fuzzy",
+    "cjk",
+    "chinese",
+    "japanese",
+    "pinyin",
+    "romaji"
+  ],
+  "author": "Menci <mencici@msn.com>",
+  "sideEffects": false,
+  "exports": {
+    ".": "./src/index.ts",
+    "./common": "./src/common/index.ts",
+    "./indexer": "./src/indexer/index.ts",
+    "./searcher": "./src/searcher/index.ts",
+    "./package.json": "./package.json"
+  },
+  "packageManager": "pnpm@10.20.0",
+  "dependencies": {
+    "@patdx/kuromoji": "^1.0.4",
+    "hepburn": "^1.2.2",
+    "opencc-js": "^1.0.5",
+    "pinyin-pro": "^3.27.0"
+  },
+  "devDependencies": {
+    "@types/hepburn": "^1.2.2",
+    "@types/jest": "^30.0.0",
+    "@types/opencc-js": "^1.0.3",
+    "jest": "^30.2.0",
+    "ts-jest": "^29.4.6"
+  },
+  "files": [
+    "README.md",
+    "dist",
+    "package.json"
+  ],
+  "publishConfig": {
+    "access": "public",
+    "main": "./dist/index.mjs",
+    "module": "./dist/index.mjs",
+    "types": "./dist/index.d.mts",
+    "exports": {
+      ".": {
+       "types": "./dist/index.d.mts",
+       "default": "./dist/index.mjs"
+      },
+      "./common": {
+       "types": "./dist/common/index.d.mts",
+       "default": "./dist/common/index.mjs"
+      },
+      "./indexer": {
+       "types": "./dist/indexer/index.d.mts",
+       "default": "./dist/indexer/index.mjs"
+      },
+      "./searcher": {
+       "types": "./dist/searcher/index.d.mts",
+       "default": "./dist/searcher/index.mjs"
+      },
+      "./package.json": "./package.json"
+    }
+  }
+}
@@ -0,0 +1,4 @@
+export * from './types';
+export * from './utils';
+export * from './normalize';
+export * from './trie';
@@ -0,0 +1,60 @@
+import { normalizeByCodePoint, toKatakana } from './normalize';
+
+describe('toKatakana', () => {
+  it('should convert hiragana to katakana', () => {
+    expect(toKatakana('あいうえお')).toBe('アイウエオ');
+    expect(toKatakana('かきくけこ')).toBe('カキクケコ');
+    expect(toKatakana('さしすせそ')).toBe('サシスセソ');
+  });
+
+  it('should keep katakana unchanged', () => {
+    expect(toKatakana('アイウエオ')).toBe('アイウエオ');
+  });
+
+  it('should keep non-kana characters unchanged', () => {
+    expect(toKatakana('abc123')).toBe('abc123');
+    expect(toKatakana('漢字')).toBe('漢字');
+  });
+
+  it('should handle mixed input', () => {
+    expect(toKatakana('あアa漢')).toBe('アアa漢');
+  });
+});
+
+describe('normalizeByCodePoint', () => {
+  it('should convert fullwidth ASCII to halfwidth lowercase', () => {
+    expect(normalizeByCodePoint('ＡＢＣ')).toBe('abc');
+    expect(normalizeByCodePoint('１２３')).toBe('123');
+    expect(normalizeByCodePoint('！＠＃')).toBe('!@#');
+  });
+
+  it('should convert fullwidth space to halfwidth space', () => {
+    expect(normalizeByCodePoint('　')).toBe(' ');
+  });
+
+  it('should convert halfwidth kana to fullwidth kana', () => {
+    expect(normalizeByCodePoint('ｱｲｳｴｵ')).toBe('アイウエオ');
+    expect(normalizeByCodePoint('ｶｷｸｹｺ')).toBe('カキクケコ');
+  });
+
+  it('should normalize voiced/semi-voiced sound marks', () => {
+    expect(normalizeByCodePoint('ﾞ')).toBe('\u3099'); // halfwidth voiced -> combining
+    expect(normalizeByCodePoint('ﾟ')).toBe('\u309A'); // halfwidth semi-voiced -> combining
+    expect(normalizeByCodePoint('゛')).toBe('\u3099'); // fullwidth voiced -> combining
+    expect(normalizeByCodePoint('゜')).toBe('\u309A'); // fullwidth semi-voiced -> combining
+  });
+
+  it('should convert halfwidth punctuation to fullwidth', () => {
+    expect(normalizeByCodePoint('｡')).toBe('。');
+    expect(normalizeByCodePoint('｢')).toBe('「');
+    expect(normalizeByCodePoint('｣')).toBe('」');
+    expect(normalizeByCodePoint('､')).toBe('、');
+    expect(normalizeByCodePoint('･')).toBe('・');
+  });
+
+  it('should lowercase regular ASCII', () => {
+    expect(normalizeByCodePoint('ABC')).toBe('abc');
+  });
+
+  // Should keep hiragana unchanged
+});
@@ -0,0 +1,42 @@
+export const normalizeByCodePoint = (string: string) => [...string].map(normalizeCodePoint).join('');
+
+export const normalizeCodePoint = (char: string) => {
+  const codePoint = char.codePointAt(0)!;
+  // Fullwidth ASCII -> Halfwidth ASCII
+  if (codePoint >= 0xFF01 && codePoint <= 0xFF5E) return String.fromCodePoint(codePoint - 0xFEE0).toLowerCase();
+  // Fullwidth space -> Halfwidth space
+  else if (codePoint === /* '　' */ 0x3000) return ' ';
+  // Halfwidth kana (U+FF66 - U+FF9D) -> Fullwidth kana
+  else if (codePoint >= 0xFF66 && codePoint <= 0xFF9D) return HALF_TO_FULL_KANA[char] ?? char;
+  else if (codePoint === /* '｡' */ 0xFF61) return '。';
+  else if (codePoint === /* '｢' */ 0xFF62) return '「';
+  else if (codePoint === /* '｣' */ 0xFF63) return '」';
+  else if (codePoint === /* '､' */ 0xFF64) return '、';
+  else if (codePoint === /* '･' */ 0xFF65) return '・';
+  else if (codePoint === /* 'ﾞ' */ 0xFF9E || codePoint === /* '゛' */ 0x309B) return '\u3099'; // -> COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
+  else if (codePoint === /* 'ﾟ' */ 0xFF9F || codePoint === /* '゜' */ 0x309C) return '\u309A'; // -> COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+  else return char.toLowerCase();
+};
+
+const HALF_TO_FULL_KANA: Record<string, string> = {
+  'ｦ': 'ヲ', 'ｧ': 'ァ', 'ｨ': 'ィ', 'ｩ': 'ゥ', 'ｪ': 'ェ', 'ｫ': 'ォ',
+  'ｬ': 'ャ', 'ｭ': 'ュ', 'ｮ': 'ョ', 'ｯ': 'ッ',
+  'ｰ': 'ー',
+  'ｱ': 'ア', 'ｲ': 'イ', 'ｳ': 'ウ', 'ｴ': 'エ', 'ｵ': 'オ',
+  'ｶ': 'カ', 'ｷ': 'キ', 'ｸ': 'ク', 'ｹ': 'ケ', 'ｺ': 'コ',
+  'ｻ': 'サ', 'ｼ': 'シ', 'ｽ': 'ス', 'ｾ': 'セ', 'ｿ': 'ソ',
+  'ﾀ': 'タ', 'ﾁ': 'チ', 'ﾂ': 'ツ', 'ﾃ': 'テ', 'ﾄ': 'ト',
+  'ﾅ': 'ナ', 'ﾆ': 'ニ', 'ﾇ': 'ヌ', 'ﾈ': 'ネ', 'ﾉ': 'ノ',
+  'ﾊ': 'ハ', 'ﾋ': 'ヒ', 'ﾌ': 'フ', 'ﾍ': 'ヘ', 'ﾎ': 'ホ',
+  'ﾏ': 'マ', 'ﾐ': 'ミ', 'ﾑ': 'ム', 'ﾒ': 'メ', 'ﾓ': 'モ',
+  'ﾔ': 'ヤ', 'ﾕ': 'ユ', 'ﾖ': 'ヨ',
+  'ﾗ': 'ラ', 'ﾘ': 'リ', 'ﾙ': 'ル', 'ﾚ': 'レ', 'ﾛ': 'ロ',
+  'ﾜ': 'ワ', 'ﾝ': 'ン',
+};
+
+const isHiraganaRange = (charCode: number) => (charCode >= 0x3041 && charCode <= 0x3096) || (charCode >= 0x309D && charCode <= 0x309E);
+export const toKatakanaSingle = (char: string) => {
+  const code = char.charCodeAt(0);
+  return isHiraganaRange(code) ? String.fromCharCode(code + 0x60) : char;
+};
+export const toKatakana = (string: string) => [...string].map(toKatakanaSingle).join('');
@@ -0,0 +1,17 @@
+export interface TrieNode {
+  parent: TrieNode | undefined;
+  children: Map<number, TrieNode>; // Unicode code point -> child node
+  tokenIds: number[];
+  subTreeTokenIds: number[]; // Empty on root. Will Uint16Array be faster?
+}
+
+export const traverseTrieStep = (node: TrieNode | undefined, codePoint: string, ignorableCodePoints?: RegExp) =>
+  node?.children.get(codePoint.codePointAt(0)!) ?? (ignorableCodePoints?.test(codePoint) ? node : undefined);
+export const traverseTrie = (node: TrieNode | undefined, text: string, ignorableCodePoints?: RegExp) => {
+  if (!node) return;
+  for (const codePoint of text) {
+    node = traverseTrieStep(node, codePoint, ignorableCodePoints);
+    if (!node) return;
+  }
+  return node;
+};
@@ -0,0 +1,31 @@
+export enum TokenType {
+  Raw,
+  Kana,
+  Romaji,
+  Han,
+  Pinyin,
+}
+
+export interface TokenDefinition {
+  id: number;
+  type: TokenType;
+  text: string;
+  codePointLength: number;
+}
+
+// [start, end)
+export interface OffsetSpan {
+  start: number;
+  end: number;
+}
+
+export type CompressedInvertedIndex = {
+  documents: string[];
+  tokenTypes: TokenType[];
+  tokenReferences: number[][][]; // tokenId -> [documentId, start1, end1, start2, end2, ...][]
+  tries: {
+    romaji: number[];
+    kana: number[];
+    other: number[];
+  };
+};
@@ -0,0 +1,3 @@
+import type { OffsetSpan } from './types';
+
+export const getSpanLength = (offset: OffsetSpan) => offset.end - offset.start;
@@ -0,0 +1,73 @@
+import path from 'node:path';
+import url from 'node:url';
+
+import { TokenizerBuilder } from '@patdx/kuromoji';
+import NodeDictionaryLoader from '@patdx/kuromoji/node';
+
+import { buildInvertedIndex, type KuromojiTokenizer } from '../indexer';
+import { highlightSearchResult, loadInvertedIndex, searchInvertedIndex } from '../searcher';
+
+let kuromoji: KuromojiTokenizer;
+
+beforeAll(async () => {
+  const kuromojiDictPath = path.resolve(url.fileURLToPath(import.meta.resolve('@patdx/kuromoji')), '..', '..', 'dict');
+  kuromoji = await new TokenizerBuilder({ loader: new NodeDictionaryLoader({ dic_path: kuromojiDictPath }) }).build();
+});
+
+describe('search', () => {
+  const testDocuments = [
+    'ミーティア',
+    'エンドマークに希望と涙を添えて',
+    '宵の鳥',
+    '僕の和風本当上手',
+  ];
+
+  it('should match with mixed search query', () => {
+    const compressed = buildInvertedIndex(testDocuments, { kuromoji });
+    const invertedIndex = loadInvertedIndex(compressed);
+
+    const results = searchInvertedIndex(invertedIndex, 'bokunoh风じょう');
+
+    // Should have at least one result
+    expect(results.length).toBeGreaterThan(0);
+
+    // The first result should be "僕の和風本当上手"
+    expect(results[0]!.documentText).toBe('僕の和風本当上手');
+  });
+
+  it('should highlight search result correctly', () => {
+    const compressed = buildInvertedIndex(testDocuments, { kuromoji });
+    const invertedIndex = loadInvertedIndex(compressed);
+
+    const results = searchInvertedIndex(invertedIndex, 'bokunoh风じょう');
+    expect(results.length).toBeGreaterThan(0);
+
+    const highlighted = highlightSearchResult(results[0]!);
+
+    // Should be an array of parts
+    expect(Array.isArray(highlighted)).toBe(true);
+    expect(highlighted.length).toBeGreaterThan(0);
+
+    // Collect highlighted text
+    const highlightedTexts = highlighted
+      .filter((part): part is { highlight: string } => typeof part !== 'string')
+      .map(part => part.highlight);
+
+    expect(highlightedTexts.some(text => text.includes('僕'))).toBe(true);
+    expect(highlightedTexts.some(text => text.includes('の'))).toBe(true);
+    expect(highlightedTexts.some(text => text.includes('和'))).toBe(true);
+    expect(highlightedTexts.some(text => text.includes('風'))).toBe(true);
+    expect(highlightedTexts.some(text => text.includes('上'))).toBe(true);
+  });
+
+  it('should match romaji input to kana documents', () => {
+    const compressed = buildInvertedIndex(testDocuments, { kuromoji });
+    const invertedIndex = loadInvertedIndex(compressed);
+
+    // Search for "yoi" should match "宵の鳥"
+    const results = searchInvertedIndex(invertedIndex, 'yoi');
+    const matchedTexts = results.map(r => r.documentText);
+
+    expect(matchedTexts).toContain('宵の鳥');
+  });
+});
@@ -0,0 +1,111 @@
+import { traverseTrie } from '../common';
+import { buildTrie, serializeTrie } from '../indexer/trie';
+import { deserializeTrie } from '../searcher/trie';
+
+describe('Trie building', () => {
+  it('should build a Trie with multiple different tokens', () => {
+    const trie = buildTrie([
+      [0, 'hello'],
+      [1, 'help'],
+      [2, 'world'],
+      [3, 'word'],
+    ]);
+
+    // Traverse to verify structure
+    const helloNode = traverseTrie(trie, 'hello');
+    const helpNode = traverseTrie(trie, 'help');
+    const worldNode = traverseTrie(trie, 'world');
+    const wordNode = traverseTrie(trie, 'word');
+
+    expect(helloNode).toBeDefined();
+    expect(helpNode).toBeDefined();
+    expect(worldNode).toBeDefined();
+    expect(wordNode).toBeDefined();
+
+    // Check token IDs
+    expect(helloNode!.tokenIds).toContain(0);
+    expect(helpNode!.tokenIds).toContain(1);
+    expect(worldNode!.tokenIds).toContain(2);
+    expect(wordNode!.tokenIds).toContain(3);
+
+    // Check that 'hel' prefix node has both tokens in subTree
+    const helNode = traverseTrie(trie, 'hel');
+    expect(helNode).toBeDefined();
+    expect(helNode!.subTreeTokenIds).toContain(0);
+    expect(helNode!.subTreeTokenIds).toContain(1);
+  });
+
+  it('should handle Japanese text tokens', () => {
+    const trie = buildTrie([
+      [0, 'さくら'],
+      [1, 'サクラ'],
+      [2, '桜'],
+    ]);
+
+    expect(traverseTrie(trie, 'さくら')?.tokenIds).toContain(0);
+    expect(traverseTrie(trie, 'サクラ')?.tokenIds).toContain(1);
+    expect(traverseTrie(trie, '桜')?.tokenIds).toContain(2);
+  });
+});
+
+describe('Trie serialization and deserialization', () => {
+  it('should serialize and deserialize a Trie correctly', () => {
+    const originalTrie = buildTrie([
+      [0, 'apple'],
+      [1, 'app'],
+      [2, 'banana'],
+    ]);
+
+    // Serialize
+    const serialized = serializeTrie(originalTrie);
+    expect(Array.isArray(serialized)).toBe(true);
+    expect(serialized.length).toBeGreaterThan(0);
+
+    // Deserialize
+    const { root: deserializedTrie, tokenCodePoints } = deserializeTrie(serialized);
+
+    // Verify structure is preserved
+    const appleNode = traverseTrie(deserializedTrie, 'apple');
+    const appNode = traverseTrie(deserializedTrie, 'app');
+    const bananaNode = traverseTrie(deserializedTrie, 'banana');
+
+    expect(appleNode).toBeDefined();
+    expect(appNode).toBeDefined();
+    expect(bananaNode).toBeDefined();
+
+    expect(appleNode!.tokenIds).toContain(0);
+    expect(appNode!.tokenIds).toContain(1);
+    expect(bananaNode!.tokenIds).toContain(2);
+
+    // Verify tokenCodePoints map
+    expect(tokenCodePoints.get(0)?.join('')).toBe('apple');
+    expect(tokenCodePoints.get(1)?.join('')).toBe('app');
+    expect(tokenCodePoints.get(2)?.join('')).toBe('banana');
+
+    // Verify subTreeTokenIds are reconstructed
+    expect(appNode!.subTreeTokenIds).toContain(0);
+    expect(appNode!.subTreeTokenIds).toContain(1);
+  });
+
+  it('should preserve parent references after deserialization', () => {
+    const originalTrie = buildTrie([
+      [0, 'test'],
+    ]);
+
+    const serialized = serializeTrie(originalTrie);
+    const { root } = deserializeTrie(serialized);
+
+    const testNode = traverseTrie(root, 'test');
+    expect(testNode).toBeDefined();
+
+    // Walk back to root via parent references
+    let node = testNode;
+    let depth = 0;
+    while (node?.parent) {
+      node = node.parent;
+      depth++;
+    }
+    expect(depth).toBe(4); // 't' -> 'e' -> 's' -> 't' -> root
+    expect(node).toBe(root);
+  });
+});
@@ -0,0 +1,3 @@
+export * from './common';
+export * from './indexer';
+export * from './searcher';
@@ -0,0 +1,103 @@
+import { getHanVariants, getPinyinCandidates, isHanCharacter, unionFindSet } from './han';
+
+describe('unionFindSet', () => {
+  it('should find self as root initially', () => {
+    const ufs = unionFindSet<number>();
+    expect(ufs.find(1)).toBe(1);
+    expect(ufs.find(2)).toBe(2);
+  });
+
+  it('should union two elements', () => {
+    const ufs = unionFindSet<number>();
+    ufs.union(1, 2);
+    expect(ufs.find(1)).toBe(ufs.find(2));
+  });
+
+  it('should union multiple elements transitively', () => {
+    const ufs = unionFindSet<number>();
+    ufs.union(1, 2);
+    ufs.union(2, 3);
+    ufs.union(4, 5);
+    expect(ufs.find(1)).toBe(ufs.find(3));
+    expect(ufs.find(1)).not.toBe(ufs.find(4));
+    ufs.union(3, 4);
+    expect(ufs.find(1)).toBe(ufs.find(5));
+  });
+
+  it('should iterate all keys', () => {
+    const ufs = unionFindSet<string>();
+    ufs.union('a', 'b');
+    ufs.union('c', 'd');
+    const keys = [...ufs.keys()];
+    expect(keys).toContain('a');
+    expect(keys).toContain('b');
+    expect(keys).toContain('c');
+    expect(keys).toContain('d');
+  });
+});
+
+describe('isHanCharacter', () => {
+  it('should return true for CJK characters', () => {
+    expect(isHanCharacter('中')).toBe(true);
+    expect(isHanCharacter('国')).toBe(true);
+    expect(isHanCharacter('日')).toBe(true);
+    expect(isHanCharacter('本')).toBe(true);
+  });
+
+  it('should return false for non-CJK characters', () => {
+    expect(isHanCharacter('a')).toBe(false);
+    expect(isHanCharacter('あ')).toBe(false);
+    expect(isHanCharacter('ア')).toBe(false);
+    expect(isHanCharacter('1')).toBe(false);
+  });
+});
+
+describe('getHanVariants', () => {
+  it('should return variants for simplified/traditional characters', () => {
+    // 国 (simplified) and 國 (traditional) should be variants of each other
+    const variants1 = getHanVariants('国');
+    const variants2 = getHanVariants('國');
+    expect(variants1).toContain('国');
+    expect(variants1).toContain('國');
+    expect(variants2).toContain('国');
+    expect(variants2).toContain('國');
+  });
+
+  it('should return the character itself for characters without variants', () => {
+    const variants = getHanVariants('一');
+    expect(variants).toContain('一');
+  });
+
+  it('should return empty array for non-Han characters', () => {
+    expect(getHanVariants('a')).toEqual([]);
+    expect(getHanVariants('あ')).toEqual([]);
+  });
+});
+
+describe('getPinyinCandidates', () => {
+  it('should return pinyin for a Han character', () => {
+    const candidates = getPinyinCandidates('中');
+    expect(candidates).toContain('zhong');
+    expect(candidates).toContain('zh'); // initial
+    expect(candidates).toContain('z'); // first letter
+  });
+
+  it('should return multiple pinyin for polyphonic characters', () => {
+    // 行 can be "xing" or "hang"
+    const candidates = getPinyinCandidates('行');
+    expect(candidates).toContain('xing');
+    expect(candidates).toContain('hang');
+  });
+
+  it('should include fuzzy pinyin variants', () => {
+    // 风 is "feng", should also have fuzzy variant "fen"
+    const candidates = getPinyinCandidates('风');
+    expect(candidates).toContain('feng');
+    expect(candidates).toContain('fen'); // fuzzy: eng -> en
+  });
+
+  it('should return empty array for non-Han characters', () => {
+    expect(getPinyinCandidates('a')).toEqual([]);
+    expect(getPinyinCandidates('あ')).toEqual([]);
+  });
+});
@@ -0,0 +1,85 @@
+// @ts-expect-error No declaration file
+import hkVariants from 'opencc-js/dict/HKVariants';
+// @ts-expect-error No declaration file
+import hkVariantsRev from 'opencc-js/dict/HKVariantsRev';
+// @ts-expect-error No declaration file
+import jpVariants from 'opencc-js/dict/JPVariants';
+// @ts-expect-error No declaration file
+import jpVariantsRev from 'opencc-js/dict/JPVariantsRev';
+// @ts-expect-error No declaration file
+import stCharacters from 'opencc-js/dict/STCharacters';
+// @ts-expect-error No declaration file
+import tsCharacters from 'opencc-js/dict/TSCharacters';
+// @ts-expect-error No declaration file
+import twVariants from 'opencc-js/dict/TWVariants';
+// @ts-expect-error No declaration file
+import twVariantsRev from 'opencc-js/dict/TWVariantsRev';
+import { polyphonic } from 'pinyin-pro';
+
+export const unionFindSet = <T>() => {
+  const parent = new Map<T, T>();
+  const rank = new Map<T, number>();
+  const find = (x: T): T => {
+    const p = parent.get(x);
+    if (p == null) {
+      parent.set(x, x);
+      return x;
+    } else if (p === x) return x;
+    else {
+      const root = find(p);
+      parent.set(x, root);
+      return root;
+    }
+  };
+  const union = (x: T, y: T) => {
+    x = find(x);
+    y = find(y);
+    if (x === y) return;
+    const rankX = rank.get(x) ?? 0, rankY = rank.get(y) ?? 0;
+    if (rankX < rankY) parent.set(x, y);
+    else if (rankX > rankY) parent.set(y, x);
+    else {
+      parent.set(y, x);
+      rank.set(x, rankX + 1);
+    }
+  };
+  const keys = () => parent.keys();
+  return { find, union, keys };
+};
+
+const exchangeMap = (() => {
+  const ufs = unionFindSet<string>();
+  for (const dict of [hkVariants, hkVariantsRev, jpVariants, jpVariantsRev, stCharacters, tsCharacters, twVariants, twVariantsRev] as string[]) {
+    for (const [from, to] of dict.split('|').map(pair => pair.split(' '))) {
+      if (!from || !to || [...from].length !== 1 || [...to].length !== 1) continue;
+      ufs.union(from, to);
+    }
+  }
+  const map = new Map<string, string[]>();
+  for (const key of ufs.keys()) {
+    const root = ufs.find(key);
+    let list = map.get(root);
+    if (!list) map.set(root, list = []);
+    if (key !== root) map.set(key, list);
+    list.push(key);
+  }
+  for (const list of map.values()) list.sort();
+  return map;
+})();
+
+export const isHanCharacter = (phrase: string) => /^[\p{Script=Han}]+$/u.test(phrase);
+
+export const getHanVariants = (character: string) => exchangeMap.get(character) ?? (isHanCharacter(character) ? [character] : []);
+
+const PINYIN_INITIALS: string[] = ['b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'j', 'q', 'x', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's', 'y', 'w'];
+const PINYIN_FINALS_FUZZY_MAP: Record<string, string> = { 'ang': 'an', 'eng': 'en', 'ing': 'in' };
+export const getPinyinCandidates = (character: string) => {
+  const pinyins = polyphonic(character, { type: 'array', toneType: 'none', removeNonZh: true })[0] ?? [];
+  return Array.from(new Set(pinyins.filter(fullPinyin => fullPinyin).flatMap(fullPinyin => {
+    const initial = PINYIN_INITIALS.find(initial => fullPinyin.startsWith(initial));
+    const initialAlphabet = initial?.[0] ?? fullPinyin[0]!;
+    const fuzzySuffix = fullPinyin.slice(-3);
+    const fuzzyPinyin = fuzzySuffix in PINYIN_FINALS_FUZZY_MAP ? fullPinyin.slice(0, -3) + PINYIN_FINALS_FUZZY_MAP[fuzzySuffix] : undefined;
+    return [fullPinyin, initial, initialAlphabet, fuzzyPinyin].filter((s): s is string => !!s);
+  })));
+};
@@ -0,0 +1,5 @@
+export * from './han';
+export * from './japanese';
+export * from './tokenizer';
+export * from './trie';
+export * from './inverted-index';
@@ -0,0 +1,46 @@
+import { NORMALIZE_RULES_KANA_DAKUTEN, NORMALIZE_RULES_ROMAJI } from './japanese';
+import { createTokenizer, type TokenizerOptions } from './tokenizer';
+import { buildTrie, graftTriePaths, serializeTrie } from './trie';
+import type { CompressedInvertedIndex, TokenDefinition } from '../common/types';
+import { TokenType } from '../common/types';
+
+const buildTypedTrie = (tokens: TokenDefinition[], typePredicate: (tokenType: TokenType) => boolean) =>
+  buildTrie(tokens.filter(token => typePredicate(token.type)).map(token => [token.id, token.text]));
+
+export const buildInvertedIndex = (documents: string[], tokenizerOptions: TokenizerOptions) => {
+  const tokenizer = createTokenizer(tokenizerOptions);
+  const documentTokens = documents.map(document => tokenizer.tokenize(document));
+
+  const tokenDefinitions = [...tokenizer.tokens.values()];
+  const romajiRoot = buildTypedTrie(tokenDefinitions, type => type === TokenType.Romaji);
+  const kanaRoot = buildTypedTrie(tokenDefinitions, type => type === TokenType.Kana);
+  const otherRoot = buildTypedTrie(tokenDefinitions, type => type !== TokenType.Romaji && type !== TokenType.Kana);
+  graftTriePaths(romajiRoot, NORMALIZE_RULES_ROMAJI);
+  graftTriePaths(kanaRoot, NORMALIZE_RULES_KANA_DAKUTEN);
+
+  const invertedIndex: CompressedInvertedIndex = {
+    documents,
+    tokenTypes: tokenDefinitions.map(token => token.type),
+    tokenReferences: Array.from({ length: tokenDefinitions.length }, () => []),
+    tries: {
+      romaji: serializeTrie(romajiRoot),
+      kana: serializeTrie(kanaRoot),
+      other: serializeTrie(otherRoot),
+    },
+  };
+  for (const [documentId, tokens] of documentTokens.entries()) {
+    const tokenOccurrences = new Map<number, number[]>();
+    for (const token of tokens) {
+      let occurrences = tokenOccurrences.get(token.id);
+      if (!occurrences) {
+        occurrences = [];
+        tokenOccurrences.set(token.id, occurrences);
+      }
+      occurrences.push(token.start, token.end);
+    }
+    for (const [tokenId, occurrences] of tokenOccurrences) {
+      invertedIndex.tokenReferences[tokenId]!.push([documentId, ...occurrences]);
+    }
+  }
+  return invertedIndex;
+};
@@ -0,0 +1,66 @@
+import path from 'node:path';
+import url from 'node:url';
+
+import { TokenizerBuilder } from '@patdx/kuromoji';
+import NodeDictionaryLoader from '@patdx/kuromoji/node';
+
+import { getAllKanaReadings, toRomajiStrictly } from './japanese';
+import type { KuromojiTokenizer } from './tokenizer';
+
+let kuromoji: KuromojiTokenizer;
+
+beforeAll(async () => {
+  const kuromojiDictPath = path.resolve(url.fileURLToPath(import.meta.resolve('@patdx/kuromoji')), '..', '..', 'dict');
+  kuromoji = await new TokenizerBuilder({ loader: new NodeDictionaryLoader({ dic_path: kuromojiDictPath }) }).build();
+});
+
+describe('toRomajiStrictly', () => {
+  it('should convert basic kana to romaji', () => {
+    expect(toRomajiStrictly('あ')).toBe('a');
+    expect(toRomajiStrictly('か')).toBe('ka');
+    expect(toRomajiStrictly('さくら')).toBe('sakura');
+  });
+
+  it('should convert katakana to romaji', () => {
+    expect(toRomajiStrictly('ア')).toBe('a');
+    expect(toRomajiStrictly('カ')).toBe('ka');
+    expect(toRomajiStrictly('サクラ')).toBe('sakura');
+  });
+
+  it('should handle long vowels', () => {
+    expect(toRomajiStrictly('おう')).toBe('ou');
+    expect(toRomajiStrictly('おお')).toBe('oo');
+  });
+
+  it('should return empty string for invalid first character', () => {
+    expect(toRomajiStrictly('ー')).toBe(''); // prolonged sound mark cannot be first
+    expect(toRomajiStrictly('ゃ')).toBe(''); // small ya cannot be first
+  });
+
+  it('should return empty string for invalid last character', () => {
+    expect(toRomajiStrictly('っ')).toBe(''); // small tsu cannot be last
+  });
+
+  it('should handle gemination (small tsu)', () => {
+    expect(toRomajiStrictly('かった')).toBe('katta');
+  });
+});
+
+describe('getAllKanaReadings', () => {
+  it('should return katakana reading for pure kana input', () => {
+    const readings = getAllKanaReadings(kuromoji, 'あ');
+    expect(readings).toContain('ア');
+  });
+
+  it('should return readings for kanji', () => {
+    const readings = getAllKanaReadings(kuromoji, '僕');
+    expect(readings.length).toBeGreaterThan(0);
+    // 僕 should have reading ボク
+    expect(readings).toContain('ボク');
+  });
+
+  it('should return readings for compound words', () => {
+    const readings = getAllKanaReadings(kuromoji, '和風');
+    expect(readings.length).toBeGreaterThan(0);
+  });
+});
@@ -0,0 +1,158 @@
+import { fromKana } from 'hepburn';
+
+import type { KuromojiTokenizer } from './tokenizer';
+import { toKatakana } from '../common';
+
+// We have normalized all other sound marks to \u3099 and \u309A (combining kata-hiragana voiced/semi-voiced sound marks)
+export const isMaybeJapanese = (phrase: string) => /^[\p{Script=Han}\u3041-\u309F\u30A0-\u30FF\u3005\u3006\u30FC\u3099\u309A]+$/u.test(phrase);
+
+// See also normalize.ts
+export const isJapaneseSoundMark = (phrase: string) => /^[\u3099\u309A]+$/.test(phrase);
+export const stripJapaneseSoundMarks = (phrase: string) => phrase.replaceAll('\u3099', '').replaceAll('\u309A', '');
+
+export const isKanaSingle = (char: string) => {
+  const code = char.charCodeAt(0);
+  return (code >= 0x3041 && code <= 0x309F) || (code >= 0x30A0 && code <= 0x30FF);
+};
+export const isKana = (phrase: string) => [...phrase].every(isKanaSingle);
+
+const KANAS_CANNOT_BE_FIRST = [
+  'ァ', 'ィ', 'ゥ', 'ェ', 'ォ',
+  'ぁ', 'ぃ', 'ぅ', 'ぇ', 'ぉ',
+  'ャ', 'ュ', 'ョ',
+  'ゃ', 'ゅ', 'ょ',
+  'ヮ', 'ゎ',
+  'ㇰ', 'ㇱ', 'ㇲ', 'ㇳ', 'ㇴ', 'ㇵ', 'ㇶ', 'ㇷ', 'ㇸ', 'ㇹ', 'ㇺ', 'ㇻ', 'ㇼ', 'ㇽ', 'ㇾ', 'ㇿ',
+  'ー',
+];
+const KANAS_CANNOT_BE_LAST = [
+  'ッ', 'っ',
+];
+export const toRomajiStrictly = (kana: string) => {
+  if (KANAS_CANNOT_BE_FIRST.includes(kana[0]!)) return '';
+  if (KANAS_CANNOT_BE_LAST.includes(kana[kana.length - 1]!)) return '';
+  const romaji = fromKana(kana).toLowerCase()
+    .replaceAll('ā', 'aa')
+    .replaceAll('ī', 'ii')
+    .replaceAll('ū', 'uu')
+    .replaceAll('ē', 'ee')
+    .replaceAll('ō', 'ou');
+  if (!romaji.match(/^[a-z]+$/)) return '';
+  return romaji;
+};
+
+export const createTranscriptionEnumerator = (
+  isValidPhrase: (codePoints: string[], start: number, length: number) => boolean,
+  getAllTranscriptions: (phrase: string) => string[],
+) => (codePoints: string[]) => {
+  const toKey = (start: number, length: number) => `${start}:${length}`;
+  const resultMap = new Map<string, { start: number; length: number; transcriptions: string[] }>();
+  for (let phraseLength = 1; phraseLength <= codePoints.length; phraseLength++) for (let start = 0; start + phraseLength <= codePoints.length; start++) {
+    if (!isValidPhrase(codePoints, start, phraseLength)) continue;
+    const phrase = codePoints.slice(start, start + phraseLength).join('');
+    const atomicTranscriptions = [...new Set(getAllTranscriptions(phrase))].filter(candidateTranscription => {
+      if (!candidateTranscription) return false;
+      // Ensure the transcription is atomic (not a combination of multiple shorter transcriptions, separated by any midpoints)
+      type State = { phrasePosition: number; transcriptionPosition: number };
+      const toStateKey = (state: State) => `${state.phrasePosition}:${state.transcriptionPosition}`;
+      const visitedStates = new Set<string>();
+      const queue: State[] = [{ phrasePosition: 0, transcriptionPosition: 0 }];
+      while (queue.length > 0) {
+        const { phrasePosition, transcriptionPosition } = queue.shift()!;
+        for (let prefixLength = 1; prefixLength <= phraseLength - phrasePosition; prefixLength++) {
+          const prefixResult = resultMap.get(toKey(start + phrasePosition, prefixLength));
+          if (!prefixResult) continue;
+          for (const transcription of prefixResult.transcriptions) {
+            if (candidateTranscription.slice(transcriptionPosition, transcriptionPosition + transcription.length) === transcription) {
+              const nextState: State = { phrasePosition: phrasePosition + prefixLength, transcriptionPosition: transcriptionPosition + transcription.length };
+              if (nextState.phrasePosition === phraseLength && nextState.transcriptionPosition === candidateTranscription.length) return false; // Found a valid combination
+              if (visitedStates.has(toStateKey(nextState))) continue;
+              visitedStates.add(toStateKey(nextState));
+              queue.push(nextState);
+            }
+          }
+        }
+      }
+      return true;
+    });
+    if (atomicTranscriptions.length > 0) resultMap.set(toKey(start, phraseLength), { start, length: phraseLength, transcriptions: atomicTranscriptions });
+  }
+  return [...resultMap.values()];
+};
+
+export const getAllKanaReadings = (kuromoji: KuromojiTokenizer, phrase: string) => Array.from(new Set(
+  [
+    ...isKana(phrase) ? [toKatakana(phrase)] : [],
+    ...isKana(phrase) && [...phrase].length === 1 ? [] : ((kuromoji.token_info_dictionary.target_map[kuromoji.viterbi_builder.trie.lookup(phrase)] ?? [])
+      .map(id => kuromoji.formatter.formatEntry(
+        id, 0, 'KNOWN',
+        kuromoji.token_info_dictionary.getFeatures(id as unknown as string)?.split(',') ?? [],
+      ).reading)
+      .filter((reading): reading is string => !!reading))
+      .map(toKatakana),
+  ],
+));
+
+const createNormalizer = (rules: Record<string, string>) => (text: string) => {
+  while (true) {
+    const beforeCurrentIteration = text;
+    for (const [from, to] of Object.entries(rules)) text = text.replaceAll(from, to);
+    if (text === beforeCurrentIteration) break;
+  }
+  return text;
+};
+
+export const NORMALIZE_RULES_ROMAJI: Record<string, string> = {
+  // Remove all long vowels (sa-ba- -> saba)
+  '-': '',
+  // Collapse consecutive vowels
+  'aa': 'a',
+  'ii': 'i',
+  'uu': 'u',
+  'ee': 'e',
+  'oo': 'o',
+  'ou': 'o',
+  // mb/mp/mm -> nb/np/nm (shimbun -> shinbun)
+  'mb': 'nb',
+  'mp': 'np',
+  'mm': 'nm',
+  // Others
+  'sha': 'sya',
+  'tsu': 'tu',
+  'chi': 'ti',
+  'shi': 'si',
+  'ji': 'zi',
+};
+export const normalizeRomaji = createNormalizer(NORMALIZE_RULES_ROMAJI);
+
+export const NORMALIZE_RULES_KANA_DAKUTEN: Record<string, string> = {
+  'う\u3099': 'ゔ',
+  'か\u3099': 'が', 'き\u3099': 'ぎ', 'く\u3099': 'ぐ', 'け\u3099': 'げ', 'こ\u3099': 'ご',
+  'さ\u3099': 'ざ', 'し\u3099': 'じ', 'す\u3099': 'ず', 'せ\u3099': 'ぜ', 'そ\u3099': 'ぞ',
+  'た\u3099': 'だ', 'ち\u3099': 'ぢ', 'つ\u3099': 'づ', 'て\u3099': 'で', 'と\u3099': 'ど',
+  'は\u3099': 'ば', 'ひ\u3099': 'び', 'ふ\u3099': 'ぶ', 'へ\u3099': 'べ', 'ほ\u3099': 'ぼ',
+  'は\u309A': 'ぱ', 'ひ\u309A': 'ぴ', 'ふ\u309A': 'ぷ', 'へ\u309A': 'ぺ', 'ほ\u309A': 'ぽ',
+  'ゝ\u3099': 'ゞ',
+
+  'ウ\u3099': 'ヴ',
+  'カ\u3099': 'ガ', 'キ\u3099': 'ギ', 'ク\u3099': 'グ', 'ケ\u3099': 'ゲ', 'コ\u3099': 'ゴ',
+  'サ\u3099': 'ザ', 'シ\u3099': 'ジ', 'ス\u3099': 'ズ', 'セ\u3099': 'ゼ', 'ソ\u3099': 'ゾ',
+  'タ\u3099': 'ダ', 'チ\u3099': 'ヂ', 'ツ\u3099': 'ヅ', 'テ\u3099': 'デ', 'ト\u3099': 'ド',
+  'ハ\u3099': 'バ', 'ヒ\u3099': 'ビ', 'フ\u3099': 'ブ', 'ヘ\u3099': 'ベ', 'ホ\u3099': 'ボ',
+  'ハ\u309A': 'パ', 'ヒ\u309A': 'ピ', 'フ\u309A': 'プ', 'ヘ\u309A': 'ペ', 'ホ\u309A': 'ポ',
+  'ワ\u3099': 'ヷ', 'ヰ\u3099': 'ヸ', 'ヱ\u3099': 'ヹ', 'ヲ\u3099': 'ヺ',
+  'ヽ\u3099': 'ヾ',
+};
+export const normalizeKanaDakuten = createNormalizer(NORMALIZE_RULES_KANA_DAKUTEN);
+
+const isValidJapanesePhrase = (codePoints: string[], start: number, length: number) =>
+  // Skip splittings that cause sound marks to occur in the first position of a phrase
+  !isJapaneseSoundMark(codePoints[start]!) && (start + length === codePoints.length || !isJapaneseSoundMark(codePoints[start + length]!));
+export const createKanaTranscriptionEnumerator = (kuromoji: KuromojiTokenizer) => createTranscriptionEnumerator(
+  isValidJapanesePhrase,
+  phrase => getAllKanaReadings(kuromoji, stripJapaneseSoundMarks(normalizeKanaDakuten(phrase))),
+);
+export const createRomajiTranscriptionEnumerator = (kuromoji: KuromojiTokenizer) => createTranscriptionEnumerator(
+  isValidJapanesePhrase,
+  phrase => getAllKanaReadings(kuromoji, stripJapaneseSoundMarks(normalizeKanaDakuten(phrase))).map(kana => normalizeRomaji(toRomajiStrictly(kana))),
+);
@@ -0,0 +1,166 @@
+import path from 'node:path';
+import url from 'node:url';
+
+import { TokenizerBuilder } from '@patdx/kuromoji';
+import NodeDictionaryLoader from '@patdx/kuromoji/node';
+
+import { createTokenizer, type KuromojiTokenizer } from './tokenizer';
+import { TokenType } from '../common/types';
+
+let kuromoji: KuromojiTokenizer;
+
+beforeAll(async () => {
+  const kuromojiDictPath = path.resolve(url.fileURLToPath(import.meta.resolve('@patdx/kuromoji')), '..', '..', 'dict');
+  kuromoji = await new TokenizerBuilder({ loader: new NodeDictionaryLoader({ dic_path: kuromojiDictPath }) }).build();
+});
+
+describe('tokenizer', () => {
+  it('should tokenize mixed Japanese text', () => {
+    const tokenizer = createTokenizer({ kuromoji });
+    const tokens = tokenizer.tokenize('僕の和風本当上手');
+
+    // Get all token definitions
+    const tokenDefs = [...tokenizer.tokens.values()];
+
+    // Should have tokens of various types
+    const types = new Set(tokenDefs.map(t => t.type));
+    expect(types.has(TokenType.Han)).toBe(true);
+    expect(types.has(TokenType.Pinyin)).toBe(true);
+    expect(types.has(TokenType.Kana)).toBe(true);
+    expect(types.has(TokenType.Romaji)).toBe(true);
+
+    const getTokenTextsAt = (pos: number, type: TokenType) => tokens
+      .filter(t => t.start <= pos && t.end > pos && tokenDefs.find(d => d.id === t.id)?.type === type)
+      .map(t => tokenDefs.find(d => d.id === t.id)!.text);
+
+    // Position 0: 僕
+    expect(getTokenTextsAt(0, TokenType.Han)).toContain('僕');
+    expect(getTokenTextsAt(0, TokenType.Pinyin)).toContain('pu');
+    expect(getTokenTextsAt(0, TokenType.Kana)).toContain('ボク');
+    expect(getTokenTextsAt(0, TokenType.Romaji)).toContain('boku');
+
+    // Position 1: の (hiragana, no Han/Pinyin)
+    expect(getTokenTextsAt(1, TokenType.Han)).toEqual([]);
+    expect(getTokenTextsAt(1, TokenType.Pinyin)).toEqual([]);
+    expect(getTokenTextsAt(1, TokenType.Kana)).toContain('ノ');
+    expect(getTokenTextsAt(1, TokenType.Romaji)).toContain('no');
+
+    // Position 2: 和
+    expect(getTokenTextsAt(2, TokenType.Han)).toContain('和');
+    expect(getTokenTextsAt(2, TokenType.Pinyin)).toContain('he');
+    expect(getTokenTextsAt(2, TokenType.Kana)).toContain('ワ');
+    expect(getTokenTextsAt(2, TokenType.Romaji)).toContain('wa');
+
+    // Position 3: 風
+    expect(getTokenTextsAt(3, TokenType.Han)).toContain('風');
+    expect(getTokenTextsAt(3, TokenType.Han)).toContain('风'); // simplified variant
+    expect(getTokenTextsAt(3, TokenType.Pinyin)).toContain('feng');
+    expect(getTokenTextsAt(3, TokenType.Kana)).toContain('フウ');
+    expect(getTokenTextsAt(3, TokenType.Romaji)).toContain('fu');
+
+    // Position 4: 本
+    expect(getTokenTextsAt(4, TokenType.Han)).toContain('本');
+    expect(getTokenTextsAt(4, TokenType.Pinyin)).toContain('ben');
+    expect(getTokenTextsAt(4, TokenType.Kana)).toContain('ホン');
+    expect(getTokenTextsAt(4, TokenType.Romaji)).toContain('hon');
+
+    // Position 5: 当
+    expect(getTokenTextsAt(5, TokenType.Han)).toContain('当');
+    expect(getTokenTextsAt(5, TokenType.Han)).toContain('當'); // traditional variant
+    expect(getTokenTextsAt(5, TokenType.Pinyin)).toContain('dang');
+    expect(getTokenTextsAt(5, TokenType.Kana)).toContain('トウ');
+    expect(getTokenTextsAt(5, TokenType.Romaji)).toContain('to'); // normalized: tou -> to
+
+    // Position 6: 上
+    expect(getTokenTextsAt(6, TokenType.Han)).toContain('上');
+    expect(getTokenTextsAt(6, TokenType.Pinyin)).toContain('shang');
+    expect(getTokenTextsAt(6, TokenType.Kana)).toContain('ジョウ');
+    expect(getTokenTextsAt(6, TokenType.Romaji)).toContain('jo'); // normalized: jou -> jo
+
+    // Position 7: 手
+    expect(getTokenTextsAt(7, TokenType.Han)).toContain('手');
+    expect(getTokenTextsAt(7, TokenType.Pinyin)).toContain('shou');
+    expect(getTokenTextsAt(7, TokenType.Kana)).toContain('シュ');
+    expect(getTokenTextsAt(7, TokenType.Romaji)).toContain('shu');
+
+    // Check that tokens cover the entire input
+    expect(tokens.length).toBeGreaterThan(0);
+
+    // Check some specific token definitions exist
+    const hanTokenTexts = tokenDefs.filter(t => t.type === TokenType.Han).map(t => t.text);
+    expect(hanTokenTexts).toContain('僕');
+    expect(hanTokenTexts).toContain('和');
+    expect(hanTokenTexts).toContain('風');
+
+    // Check kana readings exist for kanji
+    const kanaTokenTexts = tokenDefs.filter(t => t.type === TokenType.Kana).map(t => t.text);
+    expect(kanaTokenTexts).toContain('ボク'); // 僕 -> ボク
+
+    // Check romaji readings exist
+    const romajiTokenTexts = tokenDefs.filter(t => t.type === TokenType.Romaji).map(t => t.text);
+    expect(romajiTokenTexts).toContain('boku'); // 僕 -> boku
+  });
+
+  it('should not create duplicate tokens when tokenizing multiple documents', () => {
+    const tokenizer = createTokenizer({ kuromoji });
+
+    // Tokenize multiple music names that share some characters
+    tokenizer.tokenize('僕の和風本当上手');
+    tokenizer.tokenize('僕');
+    tokenizer.tokenize('和風');
+
+    // Check that there are no duplicate tokens
+    const tokenDefs = [...tokenizer.tokens.values()];
+    const tokenKeys = tokenDefs.map(t => `${t.type}:${t.text}`);
+    const uniqueKeys = new Set(tokenKeys);
+
+    expect(tokenKeys.length).toBe(uniqueKeys.size);
+
+    // Also check that IDs are unique
+    const ids = tokenDefs.map(t => t.id);
+    const uniqueIds = new Set(ids);
+    expect(ids.length).toBe(uniqueIds.size);
+  });
+
+  it('should handle Raw tokens for non-CJK characters', () => {
+    const tokenizer = createTokenizer({ kuromoji });
+    tokenizer.tokenize('a-b');
+
+    const tokenDefs = [...tokenizer.tokens.values()];
+    const rawTokenTexts = tokenDefs.filter(t => t.type === TokenType.Raw).map(t => t.text);
+
+    expect(rawTokenTexts).toContain('a'); // normalized to lowercase
+    expect(rawTokenTexts).toContain('-');
+    expect(rawTokenTexts).toContain('b');
+  });
+
+  it('should tokenize compound word "今日" with both individual and combined readings', () => {
+    const tokenizer = createTokenizer({ kuromoji });
+    const tokens = tokenizer.tokenize('今日');
+    const tokenDefs = [...tokenizer.tokens.values()];
+
+    const getTokensWithSpan = (type: TokenType, start: number, end: number) => tokens
+      .filter(t => t.start === start && t.end === end && tokenDefs.find(d => d.id === t.id)?.type === type)
+      .map(t => tokenDefs.find(d => d.id === t.id)!.text);
+
+    // Individual character readings at position 0: 今
+    expect(getTokensWithSpan(TokenType.Han, 0, 1)).toContain('今');
+    expect(getTokensWithSpan(TokenType.Pinyin, 0, 1)).toContain('jin');
+    expect(getTokensWithSpan(TokenType.Kana, 0, 1)).toContain('コン');
+    expect(getTokensWithSpan(TokenType.Kana, 0, 1)).toContain('イマ');
+    expect(getTokensWithSpan(TokenType.Romaji, 0, 1)).toContain('kon');
+    expect(getTokensWithSpan(TokenType.Romaji, 0, 1)).toContain('ima');
+
+    // Individual character readings at position 1: 日
+    expect(getTokensWithSpan(TokenType.Han, 1, 2)).toContain('日');
+    expect(getTokensWithSpan(TokenType.Pinyin, 1, 2)).toContain('ri');
+    expect(getTokensWithSpan(TokenType.Kana, 1, 2)).toContain('ニチ');
+    expect(getTokensWithSpan(TokenType.Kana, 1, 2)).toContain('ヒ');
+    expect(getTokensWithSpan(TokenType.Romaji, 1, 2)).toContain('niti');
+    expect(getTokensWithSpan(TokenType.Romaji, 1, 2)).toContain('hi');
+
+    // Combined reading for "今日" [0, 2] - this is an indivisible compound word
+    expect(getTokensWithSpan(TokenType.Kana, 0, 2)).toContain('キョウ');
+    expect(getTokensWithSpan(TokenType.Romaji, 0, 2)).toContain('kyo'); // normalized: kyou -> kyo
+  });
+});
@@ -0,0 +1,93 @@
+import type { TokenizerBuilder } from '@patdx/kuromoji';
+
+import { getHanVariants, getPinyinCandidates } from './han';
+import { createKanaTranscriptionEnumerator, createRomajiTranscriptionEnumerator, isMaybeJapanese } from './japanese';
+import { normalizeByCodePoint } from '../common/normalize';
+import { TokenType, type TokenDefinition } from '../common/types';
+
+export interface Token {
+  id: number;
+  start: number;
+  end: number;
+}
+
+export type KuromojiTokenizer = Awaited<ReturnType<TokenizerBuilder['build']>>;
+export interface TokenizerOptions {
+  kuromoji: KuromojiTokenizer;
+}
+export const createTokenizer = (options: TokenizerOptions) => {
+  const tokens = new Map<string, TokenDefinition>();
+  let nextId = 0;
+  const ensureToken = (type: TokenType, text: string) => {
+    const key = `${type}:${text}`;
+    let tokenDefinition = tokens.get(key);
+    if (tokenDefinition) return tokenDefinition;
+    tokenDefinition = { id: nextId++, type, text, codePointLength: [...text].length };
+    tokens.set(key, tokenDefinition);
+    return tokenDefinition;
+  };
+
+  const enumerateAllKanaCombinations = createKanaTranscriptionEnumerator(options.kuromoji);
+  const enumerateAllRomajiCombinations = createRomajiTranscriptionEnumerator(options.kuromoji);
+  const tokenize = (text: string) => {
+    const results: Token[] = [];
+    const emitter = (start: number, end: number) => (type: TokenType, text: string) => results.push({ id: ensureToken(type, text).id, start, end });
+
+    const emitMaybeJapanese = (codePoints: string[], offset: number) => {
+      for (const { start, length, transcriptions } of enumerateAllKanaCombinations(codePoints)) {
+        const emit = emitter(offset + start, offset + start + length);
+        for (const transcription of transcriptions) emit(TokenType.Kana, transcription);
+      }
+      for (const { start, length, transcriptions } of enumerateAllRomajiCombinations(codePoints)) {
+        const emit = emitter(offset + start, offset + start + length);
+        for (const transcription of transcriptions) emit(TokenType.Romaji, transcription);
+      }
+      for (let i = 0; i < codePoints.length; i++) {
+        // Single character may have not only kana readings, but also Chinese pronunciations or Simplified/Traditional/Japanese variants.
+        const character = codePoints[i]!;
+        const hanAlternates = getHanVariants(character); // All possible variant characters (Simplified/Traditional/Japanese)
+        const pinyinAlternates = Array.from(new Set(hanAlternates.flatMap(han => getPinyinCandidates(han)))); // All possible pinyin candidates
+        const emit = emitter(offset + i, offset + i + 1);
+        for (const han of hanAlternates) emit(TokenType.Han, han);
+        for (const pinyin of pinyinAlternates) emit(TokenType.Pinyin, pinyin);
+      }
+    };
+    const emitRaw = (codePoint: string, offset: number) => emitter(offset, offset + 1)(TokenType.Raw, codePoint);
+
+    const codePoints = [...normalizeByCodePoint(text)];
+    for (let start = 0; start < codePoints.length;) {
+      const codePoint = codePoints[start]!;
+
+      const consequentCharsets = [
+        { is: isMaybeJapanese, emit: emitMaybeJapanese },
+      ];
+      let emitted = false;
+      for (const { is, emit } of consequentCharsets) {
+        let length = 0;
+        while (start + length < codePoints.length && is(codePoints[start + length]!)) length++;
+        if (length > 0) {
+          emit(codePoints.slice(start, start + length), start);
+          start += length;
+          emitted = true;
+          break;
+        }
+      }
+      if (emitted) continue;
+
+      // Skip whitespaces
+      if (/\s/.test(codePoint)) {
+        start++;
+        continue;
+      }
+
+      emitRaw(codePoint, start);
+      start++;
+    }
+    return results;
+  };
+
+  return {
+    tokens,
+    tokenize,
+  };
+};
@@ -0,0 +1,51 @@
+import { traverseTrie } from '../common';
+import { buildTrie, graftTriePaths } from './trie';
+
+describe('graftTriePaths', () => {
+  it('should graft paths according to normalization rules', () => {
+    // Build a trie with tokens containing normalized forms
+    const trie = buildTrie([
+      [0, 'sya'], // normalized form of "sha"
+      [1, 'tu'],  // normalized form of "tsu"
+    ]);
+
+    // Graft paths so that "sha" -> "sya" and "tsu" -> "tu"
+    graftTriePaths(trie, {
+      sha: 'sya',
+      tsu: 'tu',
+    });
+
+    // Now we should be able to traverse using both the original and grafted paths
+    const syaNode = traverseTrie(trie, 'sya');
+    const shaNode = traverseTrie(trie, 'sha');
+    expect(syaNode).toBeDefined();
+    expect(shaNode).toBeDefined();
+    expect(syaNode).toBe(shaNode); // Both paths should lead to the same node
+
+    const tuNode = traverseTrie(trie, 'tu');
+    const tsuNode = traverseTrie(trie, 'tsu');
+    expect(tuNode).toBeDefined();
+    expect(tsuNode).toBeDefined();
+    expect(tuNode).toBe(tsuNode);
+  });
+
+  it('should handle chained graft rules', () => {
+    const trie = buildTrie([
+      [0, 'o'], // normalized vowel
+    ]);
+
+    // Chain: "ou" -> "o", "oo" -> "o"
+    graftTriePaths(trie, {
+      ou: 'o',
+      oo: 'o',
+    });
+
+    const oNode = traverseTrie(trie, 'o');
+    const ouNode = traverseTrie(trie, 'ou');
+    const ooNode = traverseTrie(trie, 'oo');
+
+    expect(oNode).toBeDefined();
+    expect(ouNode).toBe(oNode);
+    expect(ooNode).toBe(oNode);
+  });
+});
@@ -0,0 +1,115 @@
+import { traverseTrie, type TrieNode } from '../common';
+
+const newNode = (parent?: TrieNode): TrieNode => ({ parent, children: new Map(), tokenIds: [], subTreeTokenIds: [] });
+
+// Assume tokens are unique.
+export const buildTrie = (tokens: [id: number, text: string][]) => {
+  const root = newNode(undefined);
+  for (const [id, text] of tokens) {
+    let node = root;
+    for (const char of text) {
+      const codePoint = char.codePointAt(0)!;
+      let childNode = node.children.get(codePoint);
+      if (!childNode) {
+        childNode = newNode(node);
+        node.children.set(codePoint, childNode);
+      }
+      node = childNode;
+      node.subTreeTokenIds.push(id);
+    }
+    node.tokenIds.push(id);
+  }
+  return root;
+};
+
+export const graftTriePaths = (root: TrieNode, rules: Record<string, string>) => {
+  for (const [inputPhrase, graftTo] of Object.entries(rules)) if ([...graftTo].length > [...inputPhrase].length) throw new Error(`Graft rule ${inputPhrase} -> ${graftTo} maps to longer string and may cause infinite loop`);
+  const visitedNodes = new Set<TrieNode>();
+  const graftFromNode = (node: TrieNode, recursiveChildren: boolean) => {
+    if (visitedNodes.has(node)) return;
+    visitedNodes.add(node);
+    if (recursiveChildren) for (const [, childNode] of node.children) graftFromNode(childNode, true);
+    while (true) {
+      const nodesWithNewGraftedChildren = new Map<TrieNode, /* depth from initial node */ number>();
+      for (const [inputPhrase, graftTo] of Object.entries(rules)) {
+        const targetNode = traverseTrie(node, graftTo);
+        if (!targetNode) continue;
+        const codePoints = [...inputPhrase];
+        const graftedPath = Array.from<TrieNode>({ length: codePoints.length - 1 });
+        let isGrafted = false;
+        let currentNode = node;
+        for (let i = 0; i < codePoints.length; i++) {
+          const codePoint = codePoints[i]!.codePointAt(0)!;
+          let childNode = currentNode.children.get(codePoint);
+          if (i === codePoints.length - 1) {
+            if (childNode) {
+              if (childNode !== targetNode) throw new Error(`Grafted path ${inputPhrase} conflicts with existing path`);
+              // Already grafted
+            } else {
+              currentNode.children.set(codePoint, childNode = targetNode);
+              isGrafted = true;
+            }
+          } else {
+            if (!childNode) {
+              childNode = newNode(currentNode);
+              childNode.subTreeTokenIds = targetNode.subTreeTokenIds;
+              currentNode.children.set(codePoint, childNode);
+            } else {
+              // Part of another grafted path?
+              childNode.subTreeTokenIds = Array.from(new Set([...childNode.subTreeTokenIds, ...targetNode.subTreeTokenIds]));
+            }
+            graftedPath[i] = currentNode = childNode;
+          }
+        }
+        if (isGrafted) for (const [i, nodeToAdd] of graftedPath.entries()) nodesWithNewGraftedChildren.set(nodeToAdd, i + 1);
+      }
+
+      if (nodesWithNewGraftedChildren.size > 0) {
+        // Re-check graft rules on the newly grafted path
+        // 1. No need to recursive other children (not on this path) since their children are not affected
+        // 2. No need to consider ancestors of this node since they're handled later (we run in DFS order)
+        const sortedNodes = [...nodesWithNewGraftedChildren.entries()].sort((a, b) => b[1] - a[1]);
+        for (const [changedNode] of sortedNodes) graftFromNode(changedNode, false);
+      } else {
+        // No new grafts applied
+        break;
+      }
+    }
+  };
+  graftFromNode(root, true);
+};
+
+export const serializeTrie = (root: TrieNode) => {
+  const nodeEntries = new Map<TrieNode, {
+    id: number;
+    visited: boolean;
+    data?: number[];
+  }>();
+  let currentId = 0;
+  const getNodeEntry = (node: TrieNode) => {
+    let entry = nodeEntries.get(node);
+    if (!entry) {
+      entry = { id: ++currentId, visited: false };
+      nodeEntries.set(node, entry);
+    }
+    return entry;
+  };
+  const serializeNode = (node: TrieNode) => {
+    const entry = getNodeEntry(node);
+    if (entry.visited) return entry.id;
+    entry.visited = true;
+    const children = [...node.children.entries()].map(([codePoint, childNode]) => [codePoint, serializeNode(childNode)] as const);
+    entry.data = [
+      node.parent ? getNodeEntry(node.parent).id : 0,
+      ...children.map(child => child[0]), // code points
+      ...children.map(child => child[1]), // child node ids
+      // End of children list (<= 0 are not valid code points nor node IDs)
+      ...node.tokenIds.length > 0
+        ? node.tokenIds.map(tokenId => -(tokenId + 1)) // Use the negative value of (tokenId + 1)
+        : [0], // End of children list, no token IDs (token IDs are encoded to negative values)
+    ];
+    return entry.id;
+  };
+  serializeNode(root);
+  return [...nodeEntries.values()].sort((a, b) => a.id - b.id).flatMap(node => node.data ?? []);
+};
@@ -0,0 +1,26 @@
+import { getSpanLength, TokenType } from '../common';
+import type { SearchResult } from './search';
+
+export type HighlightedTextPart = /* not highlighted */ string | /* highlighted */ { highlight: string };
+
+export const highlightSearchResult = (resultDocument: SearchResult): HighlightedTextPart[] => {
+  const highlightResult: HighlightedTextPart[] = [];
+  let previousHighlightEnd = 0;
+  for (const token of resultDocument.tokens) {
+    const notHighlightedText = resultDocument.documentCodePoints.slice(previousHighlightEnd, token.documentOffset.start).join('');
+    if (notHighlightedText.length > 0) highlightResult.push(notHighlightedText);
+    const highlightEnd = token.isTokenPrefixMatching && (token.definition.type === TokenType.Kana)
+      ? token.documentOffset.start + Math.max(
+        1,
+        Math.round(
+          getSpanLength(token.documentOffset) *
+          Math.min(1, getSpanLength(token.inputOffset) / token.definition.codePointLength),
+        ),
+      )
+      : token.documentOffset.end;
+    highlightResult.push({ highlight: resultDocument.documentCodePoints.slice(token.documentOffset.start, highlightEnd).join('') });
+    previousHighlightEnd = highlightEnd;
+  }
+  if (previousHighlightEnd < resultDocument.documentCodePoints.length) highlightResult.push(resultDocument.documentCodePoints.slice(previousHighlightEnd).join(''));
+  return highlightResult;
+};
@@ -0,0 +1,4 @@
+export * from './trie';
+export * from './inverted-index';
+export * from './search';
+export * from './highlight';
@@ -0,0 +1,59 @@
+import { deserializeTrie } from './trie';
+import type { TrieNode } from '../common';
+import type { CompressedInvertedIndex, OffsetSpan, TokenDefinition } from '../common/types';
+
+export interface TokenDocumentReference {
+  documentId: number;
+  offsets: OffsetSpan[];
+}
+
+interface TokenDefinitionExtended extends TokenDefinition {
+  references: TokenDocumentReference[];
+};
+
+const mergeMap = <K, V>(...maps: Map<K, V>[]) => {
+  const result = new Map<K, V>();
+  for (const map of maps) for (const [key, value] of map.entries()) result.set(key, value);
+  return result;
+};
+
+export interface LoadedInvertedIndex {
+  documents: string[];
+  documentCodePoints: string[][];
+  tokenDefinitions: TokenDefinitionExtended[];
+  tries: {
+    romaji: TrieNode;
+    kana: TrieNode;
+    other: TrieNode;
+  };
+}
+
+export const loadInvertedIndex = (compressed: CompressedInvertedIndex): LoadedInvertedIndex => {
+  const documents = compressed.documents;
+  const documentCodePoints = documents.map(document => [...document]);
+
+  const romajiTrie = deserializeTrie(compressed.tries.romaji);
+  const kanaTrie = deserializeTrie(compressed.tries.kana);
+  const otherTrie = deserializeTrie(compressed.tries.other);
+
+  const tokenCodePoints = mergeMap(romajiTrie.tokenCodePoints, kanaTrie.tokenCodePoints, otherTrie.tokenCodePoints);
+  const tokenDefinitions = compressed.tokenTypes.map<TokenDefinitionExtended>((type, index) => ({
+    id: index, type, text: tokenCodePoints.get(index)!.join(''),
+    codePointLength: tokenCodePoints.get(index)!.length,
+    references: compressed.tokenReferences[index]!.map<TokenDocumentReference>(([documentId, ...offsets]) => ({
+      documentId: documentId!,
+      offsets: Array.from({ length: offsets.length / 2 }, (_, i) => ({ start: offsets[i * 2]!, end: offsets[i * 2 + 1]! })),
+    })),
+  }));
+
+  return {
+    documents,
+    documentCodePoints,
+    tokenDefinitions,
+    tries: {
+      romaji: romajiTrie.root,
+      kana: kanaTrie.root,
+      other: otherTrie.root,
+    },
+  };
+};
@@ -0,0 +1,258 @@
+import { highlightSearchResult } from './highlight';
+import { getTrieNodeTokenIds } from './trie';
+import type { TrieNode } from '../common';
+import { traverseTrieStep } from '../common';
+import type { LoadedInvertedIndex } from './inverted-index';
+import { normalizeByCodePoint, toKatakana } from '../common/normalize';
+import { type OffsetSpan, type TokenDefinition, TokenType } from '../common/types';
+import { getSpanLength } from '../common/utils';
+
+const IGNORABLE_CODE_POINTS = /[\s\u3099\u309A]/u;
+
+enum TokenTypePrefixMatchingPolicy {
+  AlwaysAllow,
+  NeverAllow,
+  AllowOnlyAtInputEnd,
+}
+const tokenTypePrefixMatchingPolicy: Record<TokenType, TokenTypePrefixMatchingPolicy> = {
+  [TokenType.Romaji]: TokenTypePrefixMatchingPolicy.NeverAllow,
+  [TokenType.Kana]: TokenTypePrefixMatchingPolicy.AlwaysAllow,
+  // These token types are in an "other" Trie
+  [TokenType.Han]: TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd, // No effect because always 1 code point
+  [TokenType.Pinyin]: TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd,
+  [TokenType.Raw]: TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd, // No effect because always 1 code point
+};
+const shouldAllowPrefixMatching = (tokenType: TokenType, isAtInputEnd: boolean) =>
+  tokenTypePrefixMatchingPolicy[tokenType] === TokenTypePrefixMatchingPolicy.AlwaysAllow ||
+  (tokenTypePrefixMatchingPolicy[tokenType] !== TokenTypePrefixMatchingPolicy.NeverAllow && isAtInputEnd);
+
+export interface SearchResultToken {
+  definition: TokenDefinition;
+  documentOffset: OffsetSpan;
+  inputOffset: OffsetSpan;
+  isTokenPrefixMatching: boolean;
+}
+
+interface ComparableStateTraits<T> {
+  getRangeCount: (state: T) => number;
+  getPrefixMatchCount: (state: T) => number;
+  getFirstTokenDocumentOffset: (state: T) => OffsetSpan;
+  getLastTokenDocumentOffset: (state: T) => OffsetSpan;
+  getLastToken?: (state: T) => SearchResultToken; // Not on intermediate results
+  getMatchRatioLevel?: (state: T) => number; // Not on intermediate/candidate results
+  getMatchRatio: (state: T) => number;
+  // Called when all other comparisons are equal
+  nextComparer?: (a: T, b: T) => number; // Not on intermediate/candidate results
+}
+
+const getComparerForTraits = <T>(traits: ComparableStateTraits<T>) => (a: T, b: T) => {
+  // Prefer matches that not relying on end-of-input loose matching (full match over prefix match)
+  if (traits.getLastToken) {
+    const aLastToken = traits.getLastToken(a), bLastToken = traits.getLastToken(b);
+    const aDidPrefixMatchByTokenType = aLastToken.isTokenPrefixMatching && tokenTypePrefixMatchingPolicy[aLastToken.definition.type] === TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd;
+    const bDidPrefixMatchByTokenType = bLastToken.isTokenPrefixMatching && tokenTypePrefixMatchingPolicy[bLastToken.definition.type] === TokenTypePrefixMatchingPolicy.AllowOnlyAtInputEnd;
+    if (aDidPrefixMatchByTokenType !== bDidPrefixMatchByTokenType) return aDidPrefixMatchByTokenType ? 1 : -1;
+  }
+
+  // Prefer results that matched fewer discontinuous ranges over more
+  const aRangeCount = traits.getRangeCount(a), bRangeCount = traits.getRangeCount(b);
+  if (aRangeCount !== bRangeCount) return aRangeCount - bRangeCount;
+
+  // Prefer results that matches first token in document earlier over later
+  const aFirstTokenDocumentOffset = traits.getFirstTokenDocumentOffset(a), bFirstTokenDocumentOffset = traits.getFirstTokenDocumentOffset(b);
+  if (aFirstTokenDocumentOffset.start !== bFirstTokenDocumentOffset.start) return aFirstTokenDocumentOffset.start - bFirstTokenDocumentOffset.start;
+
+  // Prefer results that has higher match ratio (but don't distinguish similar ratios, so we introduced `matchRatioLevel`)
+  if (traits.getMatchRatioLevel) {
+    const aMatchRatioLevel = traits.getMatchRatioLevel(a), bMatchRatioLevel = traits.getMatchRatioLevel(b);
+    if (aMatchRatioLevel !== bMatchRatioLevel) return bMatchRatioLevel - aMatchRatioLevel;
+  }
+
+  // Prefer results that last token occurred earlier (if same, ended earlier) in the document over later
+  const aLastTokenDocumentOffset = traits.getLastTokenDocumentOffset(a), bLastTokenDocumentOffset = traits.getLastTokenDocumentOffset(b);
+  if (aLastTokenDocumentOffset.start !== bLastTokenDocumentOffset.start) return aLastTokenDocumentOffset.start - bLastTokenDocumentOffset.start;
+  if (aLastTokenDocumentOffset.end !== bLastTokenDocumentOffset.end) return aLastTokenDocumentOffset.end - bLastTokenDocumentOffset.end;
+
+  // Prefer results that has higher match ratio (precisely)
+  const aMatchRatio = traits.getMatchRatio(a), bMatchRatio = traits.getMatchRatio(b);
+  if (aMatchRatio !== bMatchRatio) return bMatchRatio - aMatchRatio;
+
+  return traits.nextComparer?.(a, b) ?? 0;
+};
+
+interface IntermediateResult {
+  previousState?: IntermediateResult;
+  firstTokenDocumentOffset: OffsetSpan;
+  rangeCount: number;
+  tokenCount: number;
+  prefixMatchCount: number;
+  matchedTokenLength: number;
+  tokenId: number;
+  documentOffset: OffsetSpan;
+  inputOffset: OffsetSpan;
+  isTokenPrefixMatching: boolean;
+}
+const compareIntermediateResult = getComparerForTraits<IntermediateResult>({
+  getRangeCount: state => state.rangeCount,
+  getPrefixMatchCount: state => state.prefixMatchCount,
+  getFirstTokenDocumentOffset: state => state.firstTokenDocumentOffset,
+  getLastTokenDocumentOffset: state => state.documentOffset,
+  getMatchRatio: state => state.matchedTokenLength, // No need to divide document length since intermediate results are for same document
+});
+
+interface CandidateResult {
+  tokens: SearchResultToken[];
+  prefixMatchCount: number;
+  matchedTokenLength: number;
+  rangeCount: number;
+}
+const compareCandidateResult = getComparerForTraits<CandidateResult>({
+  getRangeCount: state => state.rangeCount,
+  getPrefixMatchCount: state => state.prefixMatchCount,
+  getFirstTokenDocumentOffset: state => state.tokens[0]!.documentOffset,
+  getLastTokenDocumentOffset: state => state.tokens[state.tokens.length - 1]!.documentOffset,
+  getLastToken: state => state.tokens[state.tokens.length - 1]!,
+  getMatchRatio: state => state.matchedTokenLength, // No need to divide document length since candidate results are for same document
+});
+
+export interface SearchResult {
+  documentId: number;
+  documentText: string;
+  documentCodePoints: string[];
+  tokens: SearchResultToken[];
+  prefixMatchCount: number;
+  rangeCount: number;
+  matchRatio: number;
+  matchRatioLevel: number;
+}
+const compareFinalResult = getComparerForTraits<SearchResult>({
+  getRangeCount: state => state.rangeCount,
+  getPrefixMatchCount: state => state.prefixMatchCount,
+  getFirstTokenDocumentOffset: state => state.tokens[0]!.documentOffset,
+  getLastTokenDocumentOffset: state => state.tokens[state.tokens.length - 1]!.documentOffset,
+  getLastToken: state => state.tokens[state.tokens.length - 1]!,
+  getMatchRatio: state => state.matchRatio,
+  getMatchRatioLevel: state => Math.round(state.matchRatio * 5),
+  nextComparer: (a, b) => a.documentText === b.documentText ? 0 : a.documentText < b.documentText ? -1 : 1,
+});
+
+const hasNonEmptyCharacters = (documentCodePoints: string[], start: number, end: number) => start !== end && !documentCodePoints.slice(start, end).every(char => /\s/.test(char));
+
+export const searchInvertedIndex = (invertedIndex: LoadedInvertedIndex, text: string): SearchResult[] => {
+  const { documents, documentCodePoints, tokenDefinitions, tries } = invertedIndex;
+
+  const codePoints = [...toKatakana(normalizeByCodePoint(text))];
+  // dp[i] = docId => end => IntermediateResult, starts from dp[-1] (l === 0), ends at dp[N - 1] (r === N - 1)
+  const dp = Array.from({ length: codePoints.length }, () => new Map<number, Record<number, IntermediateResult>>());
+  for (let l = 0; l < codePoints.length; l++) {
+    if (l !== 0 && dp[l - 1]!.size === 0) continue; // No documents match input from beginning to this position
+    let romajiNode: TrieNode | undefined = tries.romaji;
+    let kanaNode: TrieNode | undefined = tries.kana;
+    let otherNode: TrieNode | undefined = tries.other;
+    for (let r = l; r < codePoints.length && (romajiNode || kanaNode || otherNode); r++) { // [l, r]
+      const codePoint = codePoints[r]!;
+      romajiNode = traverseTrieStep(romajiNode, codePoint, IGNORABLE_CODE_POINTS);
+      kanaNode = traverseTrieStep(kanaNode, codePoint, IGNORABLE_CODE_POINTS);
+      otherNode = traverseTrieStep(otherNode, codePoint, IGNORABLE_CODE_POINTS);
+      const reachingInputEnd = r === codePoints.length - 1;
+      const matchingTokenIds = new Set([
+        // Allow suffix matching of romaji/other tokens if we're at the end of the input
+        ...getTrieNodeTokenIds(romajiNode, shouldAllowPrefixMatching(TokenType.Romaji, reachingInputEnd)),
+        ...getTrieNodeTokenIds(kanaNode, shouldAllowPrefixMatching(TokenType.Kana, reachingInputEnd)),
+        ...getTrieNodeTokenIds(otherNode, reachingInputEnd),
+      ]);
+      for (const tokenId of matchingTokenIds) for (const { documentId, offsets } of tokenDefinitions[tokenId]!.references) {
+        const isTokenPrefixMatching = !romajiNode?.tokenIds.includes(tokenId) && !kanaNode?.tokenIds.includes(tokenId) && !otherNode?.tokenIds.includes(tokenId);
+        const previousMatchesOfDocument = dp[l - 1]?.get(documentId);
+        if (l !== 0 && !previousMatchesOfDocument) continue;
+        for (const documentOffset of offsets) {
+          const { start: currentStart, end: currentEnd } = documentOffset;
+          const contributeNextMatchingState = (previousState: IntermediateResult | undefined) => {
+            const nextMatchingMap = dp[r]!;
+            let nextMatchesOfDocument = nextMatchingMap.get(documentId);
+            if (!nextMatchesOfDocument) {
+              nextMatchesOfDocument = Object.create(null) as Record<number, IntermediateResult>;
+              nextMatchingMap.set(documentId, nextMatchesOfDocument);
+            }
+            const oldResult = nextMatchesOfDocument[currentEnd];
+            const inputOffset = { start: l, end: r + 1 };
+            const newResult: IntermediateResult = {
+              previousState,
+              firstTokenDocumentOffset: previousState?.firstTokenDocumentOffset ?? documentOffset,
+              rangeCount: !previousState ? 1
+                : (previousState.rangeCount + (hasNonEmptyCharacters(documentCodePoints[documentId]!, previousState.documentOffset.end, currentStart) ? 1 : 0)),
+              tokenCount: (previousState?.tokenCount ?? 0) + 1,
+              prefixMatchCount: (previousState?.prefixMatchCount ?? 0) + (isTokenPrefixMatching ? 1 : 0),
+              matchedTokenLength: (previousState?.matchedTokenLength ?? 0) + getSpanLength(documentOffset) *
+                Math.min(isTokenPrefixMatching ? getSpanLength(inputOffset) / tokenDefinitions[tokenId]!.codePointLength : Infinity, 1),
+              tokenId,
+              documentOffset,
+              inputOffset,
+              isTokenPrefixMatching,
+            };
+            nextMatchesOfDocument[currentEnd] = !oldResult || compareIntermediateResult(newResult, oldResult) < 0 ? newResult : oldResult;
+          };
+          if (l === 0) contributeNextMatchingState(undefined);
+          else for (const previousEnd in previousMatchesOfDocument) if (currentStart >= Number(previousEnd))
+            contributeNextMatchingState(previousMatchesOfDocument[previousEnd as unknown as number]!);
+          // Don't `break` here because keys of `previousMatchesOfDocument` are not essentially ordered
+        }
+      }
+    }
+  }
+
+  // Build search results and sort documents
+  return [...dp[codePoints.length - 1]!.entries()].map<SearchResult>(([documentId, matches]) => {
+    const sortedMatches = Object.values(matches).map<CandidateResult>(match => {
+      const tokens: SearchResultToken[] = [];
+      // Build token list from backtracking
+      let state: IntermediateResult | undefined = match;
+      while (state) {
+        tokens.unshift({
+          definition: tokenDefinitions[state.tokenId]!,
+          documentOffset: state.documentOffset, inputOffset: state.inputOffset,
+          isTokenPrefixMatching: state.isTokenPrefixMatching,
+        });
+        state = state.previousState;
+      }
+      return { tokens, prefixMatchCount: match.prefixMatchCount, matchedTokenLength: match.matchedTokenLength, rangeCount: match.rangeCount };
+    }).sort(compareCandidateResult);
+    const bestMatchOfDocument = sortedMatches[0]!;
+    const documentText = documents[documentId]!;
+    const matchRatio = bestMatchOfDocument.matchedTokenLength / documentCodePoints[documentId]!.length;
+    const matchRatioLevel = Math.round(matchRatio * 5);
+    return {
+      documentId,
+      documentText,
+      documentCodePoints: documentCodePoints[documentId]!,
+      tokens: bestMatchOfDocument.tokens,
+      prefixMatchCount: bestMatchOfDocument.prefixMatchCount,
+      rangeCount: bestMatchOfDocument.rangeCount,
+      matchRatio,
+      matchRatioLevel,
+    };
+  }).sort(compareFinalResult);
+};
+
+// For debugging
+export const inspectSearchResult = (resultDocument: SearchResult, htmlHighlight: boolean) => {
+  const { documentText, tokens, rangeCount, matchRatio, matchRatioLevel } = resultDocument;
+  const escapeHtml = (s: string) => s.replaceAll('&', '&amp;').replaceAll('<', '&lt;').replaceAll('>', '&gt;');
+  const escapedText = htmlHighlight ? highlightSearchResult(resultDocument).map(part =>
+    typeof part === 'string' ? escapeHtml(part) : `<u><b>${escapeHtml(part.highlight)}</b></u>`).join('') : JSON.stringify(documentText);
+  const description = ` (${rangeCount} ranges, ${Math.round(matchRatio * 10000) / 10000} => L${matchRatioLevel})`;
+  return [
+    escapedText + (htmlHighlight ? `<code>${description}</code>` : description),
+    ...tokens.map(token => {
+      let escapedTokenText = JSON.stringify(token.definition.text);
+      let escapedDocumentText = JSON.stringify([...documentText].slice(token.documentOffset.start, token.documentOffset.end).join(''));
+      if (htmlHighlight) {
+        escapedTokenText = escapeHtml(escapedTokenText);
+        escapedDocumentText = escapeHtml(escapedDocumentText);
+      }
+      const line = `    ${TokenType[token.definition.type]}: ${escapedTokenText} -> ${escapedDocumentText}${token.isTokenPrefixMatching ? ' (prefix match)' : ''}`;
+      return htmlHighlight ? `<code>${line}</code>` : line;
+    }),
+    '',
+  ].join('\n');
+};
@@ -0,0 +1,58 @@
+import type { TrieNode } from '../common';
+
+export const deserializeTrie = (data: number[]) => {
+  const nodes: TrieNode[] = [];
+  const getNode = (id: number) => nodes[id - 1] ??= { parent: undefined, children: new Map(), tokenIds: [], subTreeTokenIds: [] };
+  let currentId = 0;
+  for (let i = 0; i < data.length;) {
+    const node = getNode(++currentId);
+    const parentId = data[i++]!;
+    node.parent = parentId !== 0 ? getNode(parentId) : undefined;
+
+    let endOfChildren = i;
+    while (endOfChildren < data.length && data[endOfChildren]! > 0) endOfChildren++;
+    const numberOfChildren = (endOfChildren - i) / 2;
+    for (let j = i; j < i + numberOfChildren; j++) {
+      const codePoint = data[j]!;
+      const child = getNode(data[j + numberOfChildren]!);
+      node.children.set(codePoint, child);
+    }
+    i = endOfChildren;
+
+    if (data[i] === 0) i++; // No token IDs
+    else while (i < data.length && data[i]! < 0) node.tokenIds.push(-data[i++]! - 1);
+  }
+  const root = nodes[0]!;
+
+  // DFS to construct code point paths for each token
+  const tokenCodePoints = new Map<number, string[]>();
+  const currentCodePoints: string[] = [];
+  const dfsCodePoints = (node: TrieNode) => {
+    for (const tokenId of node.tokenIds) tokenCodePoints.set(tokenId, [...currentCodePoints]);
+    for (const [codePoint, child] of node.children.entries()) {
+      if (child.parent !== node) continue; // Skip grafted paths as these are not the canonical representation of the tokens
+      currentCodePoints.push(String.fromCodePoint(codePoint));
+      dfsCodePoints(child);
+      currentCodePoints.pop();
+    }
+  };
+  dfsCodePoints(root);
+
+  // DFS to construct subTreeTokenIds for each node
+  const visitedNodes = new Set<TrieNode>();
+  const dfsSubTreeTokenIds = (node: TrieNode) => {
+    if (visitedNodes.has(node)) return node.subTreeTokenIds;
+    visitedNodes.add(node);
+    node.subTreeTokenIds = [...node.tokenIds, ...new Set([...node.children.values()].flatMap(child => dfsSubTreeTokenIds(child)))];
+    return node.subTreeTokenIds;
+  };
+  dfsSubTreeTokenIds(root);
+
+  return {
+    root,
+    tokenCodePoints,
+  };
+};
+
+export const getTrieNodeTokenIds = (node: TrieNode | undefined, includeSubTree: boolean) =>
+  (includeSubTree ? node?.subTreeTokenIds : node?.tokenIds) ?? [];
@@ -0,0 +1,23 @@
+{
+  "compilerOptions": {
+    "target": "ESNext",
+    "jsx": "preserve",
+    "lib": ["DOM", "DOM.Iterable", "ESNext", "WebWorker"],
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "noUncheckedIndexedAccess": true,
+    "resolveJsonModule": true,
+    "allowJs": true,
+    "strict": true,
+    "strictNullChecks": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "isolatedModules": true,
+    "skipLibCheck": true,
+    "rootDir": ".",
+    "outDir": "dist"
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["dist", "node_modules"]
+}
@@ -0,0 +1,15 @@
+import { defineConfig } from 'tsdown';
+
+export default defineConfig({
+  entry: [
+    './src/index.ts',
+    './src/searcher/index.ts',
+    './src/indexer/index.ts',
+    './src/common/index.ts',
+  ],
+  dts: true,
+  unused: true,
+  fixedExtension: true,
+  unbundle: true,
+  sourcemap: true,
+});
@@ -0,0 +1,9 @@
+packages:
+  - packages/*
+  - apps/*
+
+nodeLinker: hoisted
+
+onlyBuiltDependencies:
+  - '@swc/core'
+  - unrs-resolver