manticoresoftware · donhardman · Jun 21, 2026 · Jun 21, 2026
diff --git a/embeddings/Cargo.lock b/embeddings/Cargo.lock
diff --git a/embeddings/Cargo.toml b/embeddings/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 # For local dev with ../../candle, add a [patch] section to use path deps.
 [dependencies]
 tokenizers = "0.15.2"
+unicode-segmentation = "1"
 hf-hub = { git = "https://github.com/huggingface/hf-hub.git", rev = "ac22200ea0b5af4d8c362f699be0340647b19060", default-features = false,features = ["ureq"] }
 anyhow = "1.0.81"
 serde_json = "1.0.114"

diff --git a/embeddings/manticoresearch_text_embeddings.h b/embeddings/manticoresearch_text_embeddings.h
@@ -9,6 +9,15 @@
 #include <ostream>
 #include <new>
 
+/// Chunking strategy, mirrored as a `u32` across the FFI in [`ChunkSettings`].
+constexpr static const uint32_t STRATEGY_NONE = 0;
+
+constexpr static const uint32_t STRATEGY_FIXED = 1;
+
+constexpr static const uint32_t STRATEGY_RECURSIVE = 2;
+
+constexpr static const uint32_t STRATEGY_SENTENCE = 3;
+
 struct TextModelResult {
   void *m_pModel;
   char *m_szError;
@@ -66,6 +75,58 @@ using ValidateApiKeyFn = char*(*)(const TextModelWrapper*);
 /// for returning owned strings to C/C++.
 using FreeStringFn = void(*)(char*);
 
+/// One emitted chunk's byte span into the original input document.
+struct ChunkSpan {
+  uintptr_t start;
+  uintptr_t end;
+};
+
+/// Maps one input document to its run of chunks in the flat embeddings/spans
+/// arrays: the document's chunks are `[first, first + count)`.
+struct DocChunks {
+  uintptr_t first;
+  uintptr_t count;
+};
+
+/// Result of [`TextModelWrapper::make_vect_embeddings_chunked`]: a flat array of
+/// chunk embeddings, a parallel array of byte spans, and a per-input-document
+/// grouping so the C++ caller can rebuild "these N chunks belong to document i".
+///
+struct ChunkedVecResult {
+  char *m_szError;
+  const FloatVec *m_tEmbedding;
+  uintptr_t emb_len;
+  uintptr_t emb_cap;
+  const ChunkSpan *m_tSpans;
+  uintptr_t spans_cap;
+  const DocChunks *m_tDocs;
+  uintptr_t docs_len;
+  uintptr_t docs_cap;
+};
+
+/// Chunking parameters. `#[repr(C)]` — passed straight across the FFI by the
+/// daemon, which owns the DDL surface and validates against the model.
+struct ChunkSettings {
+  /// One of the `STRATEGY_*` constants. `STRATEGY_NONE` ⇒ no chunking.
+  uint32_t strategy;
+  /// Target chunk size in tokens. `0` ⇒ use the model's max. Always clamped to
+  /// the model's real input limit.
+  uint32_t max_tokens;
+  /// Token overlap between consecutive chunks. `0` ⇒ none.
+  uint32_t overlap_tokens;
+  /// Hard cap on chunks per document. `0` ⇒ unlimited. Overflow merges the
+  /// tail into the last chunk (matches OpenSearch's `max_chunk_limit`).
+  uint32_t max_chunks;
+};
+
+using MakeVectEmbeddingsChunkedFn = ChunkedVecResult(*)(const TextModelWrapper*,
+                                                        const StringItem*,
+                                                        uintptr_t,
+                                                        const ChunkSettings*,
+                                                        int32_t);
+
+using FreeChunkedResultFn = void(*)(ChunkedVecResult);
+
 struct EmbedLib {
   uintptr_t version;
   const char *version_str;
@@ -77,6 +138,8 @@ struct EmbedLib {
   GetLenFn get_max_input_size;
   ValidateApiKeyFn validate_api_key;
   FreeStringFn free_string;
+  MakeVectEmbeddingsChunkedFn make_vect_embeddings_chunked;
+  FreeChunkedResultFn free_chunked_result;
 };
 
 extern "C" {