apache · graceli02 · Mar 21, 2026 · Mar 21, 2026
diff --git a/integration-test/src/test/java/org/apache/iotdb/ainode/utils/AINodeTestUtils.java b/integration-test/src/test/java/org/apache/iotdb/ainode/utils/AINodeTestUtils.java
@@ -58,7 +58,9 @@ public class AINodeTestUtils {
               new AbstractMap.SimpleEntry<>(
                   "chronos2", new FakeModelInfo("chronos2", "t5", "builtin", "active")),
               new AbstractMap.SimpleEntry<>(
-                  "moirai2", new FakeModelInfo("moirai2", "moirai", "builtin", "active")))
+                  "moirai2", new FakeModelInfo("moirai2", "moirai", "builtin", "active")),
+              new AbstractMap.SimpleEntry<>(
+                  "toto", new FakeModelInfo("toto", "toto", "builtin", "active")))
           .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 
   public static final Map<String, FakeModelInfo> BUILTIN_MODEL_MAP;

diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/model_info.py b/iotdb-core/ainode/iotdb/ainode/core/model/model_info.py
@@ -158,4 +158,17 @@ def __repr__(self):
         },
         transformers_registered=True,
     ),
+    "toto": ModelInfo(
+        model_id="toto",
+        category=ModelCategory.BUILTIN,
+        state=ModelStates.INACTIVE,
+        model_type="toto",
+        pipeline_cls="pipeline_toto.TotoPipeline",
+        repo_id="Datadog/Toto-Open-Base-1.0",
+        auto_map={
+            "AutoConfig": "configuration_toto.TotoConfig",
+            "AutoModelForCausalLM": "modeling_toto.TotoForPrediction",
+        },
+        transformers_registered=True,
+    ),
 }
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/toto/__init__.py b/iotdb-core/ainode/iotdb/ainode/core/model/toto/__init__.py
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/toto/configuration_toto.py b/iotdb-core/ainode/iotdb/ainode/core/model/toto/configuration_toto.py
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from typing import List, Optional
+
+from transformers import PretrainedConfig
+
+
+class TotoConfig(PretrainedConfig):
+    """
+    Configuration class for the Toto time series forecasting model.
+
+    Toto (Time Series Optimized Transformer for Observability) is a foundation model
+    for multivariate time series forecasting developed by Datadog. It uses a decoder-only
+    architecture with per-variate patch-based causal scaling, proportional time-variate
+    factorized attention, and a Student-T mixture prediction head.
+
+    Reference: https://github.com/DataDog/toto
+    """
+
+    model_type = "toto"
+
+    def __init__(
+        self,
+        patch_size: int = 32,
+        stride: int = 32,
+        embed_dim: int = 1024,
+        num_layers: int = 18,
+        num_heads: int = 16,
+        mlp_hidden_dim: int = 2816,
+        dropout: float = 0.0,
+        spacewise_every_n_layers: int = 3,
+        scaler_cls: str = "per_variate_causal",
+        output_distribution_classes: Optional[List[str]] = None,
+        spacewise_first: bool = True,
+        use_memory_efficient_attention: bool = True,
+        stabilize_with_global: bool = True,
+        scale_factor_exponent: float = 10.0,
+        **kwargs,
+    ):
+        self.patch_size = patch_size
+        self.stride = stride
+        self.embed_dim = embed_dim
+        self.num_layers = num_layers
+        self.num_heads = num_heads
+        self.mlp_hidden_dim = mlp_hidden_dim
+        self.dropout = dropout
+        self.spacewise_every_n_layers = spacewise_every_n_layers
+        self.scaler_cls = scaler_cls
+        self.output_distribution_classes = output_distribution_classes or [
+            "student_t_mixture"
+        ]
+        self.spacewise_first = spacewise_first
+        self.use_memory_efficient_attention = use_memory_efficient_attention
+        self.stabilize_with_global = stabilize_with_global
+        self.scale_factor_exponent = scale_factor_exponent
+
+        super().__init__(**kwargs)
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/toto/modeling_toto.py b/iotdb-core/ainode/iotdb/ainode/core/model/toto/modeling_toto.py
@@ -0,0 +1,128 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import torch
+
+from iotdb.ainode.core.log import Logger
+
+logger = Logger()
+
+
+class TotoForPrediction(torch.nn.Module):
+    """
+    Wrapper around the Toto model for AINode integration.
+
+    Toto (Time Series Optimized Transformer for Observability) is a 151M parameter
+    foundation model for multivariate time series forecasting. This wrapper delegates
+    model loading to the ``toto-ts`` package while providing a compatible interface
+    for AINode's model loading mechanism.
+
+    The underlying Toto model uses ``huggingface_hub.ModelHubMixin`` for ``from_pretrained``
+    support, which differs from the standard ``transformers.PreTrainedModel`` pattern.
+    This wrapper bridges that gap.
+
+    Reference: https://huggingface.co/Datadog/Toto-Open-Base-1.0
+    """
+
+    def __init__(self, toto_model):
+        """
+        Initialize the wrapper with a loaded Toto model instance.
+
+        Args:
+            toto_model: A ``toto.model.toto.Toto`` instance.
+        """
+        super().__init__()
+        self.toto = toto_model
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        """
+        Load a Toto model from a local directory or HuggingFace Hub repository.
+
+        This delegates to ``toto.model.toto.Toto.from_pretrained()`` which uses
+        ``ModelHubMixin`` to load the model weights and configuration.
+
+        Args:
+            pretrained_model_name_or_path (str): Path to a local directory containing
+                ``config.json`` and ``model.safetensors``, or a HuggingFace Hub repo ID
+                (e.g., ``Datadog/Toto-Open-Base-1.0``).
+            **kwargs: Additional keyword arguments passed to the underlying loader.
+
+        Returns:
+            TotoForPrediction: A wrapper instance containing the loaded Toto model.
+        """
+        from toto.model.toto import Toto
+
+        toto_model = Toto.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        logger.info(f"Loaded Toto model from {pretrained_model_name_or_path}")
+        return cls(toto_model)
+
+    @classmethod
+    def from_config(cls, config):
+        """
+        Create a Toto model from a configuration (for training from scratch).
+
+        Args:
+            config: A ``TotoConfig`` or compatible configuration object.
+
+        Returns:
+            TotoForPrediction: A wrapper instance containing a newly initialized Toto model.
+        """
+        from toto.model.toto import Toto
+
+        toto_model = Toto(
+            patch_size=getattr(config, "patch_size", 32),
+            stride=getattr(config, "stride", 32),
+            embed_dim=getattr(config, "embed_dim", 1024),
+            num_layers=getattr(config, "num_layers", 18),
+            num_heads=getattr(config, "num_heads", 16),
+            mlp_hidden_dim=getattr(config, "mlp_hidden_dim", 2816),
+            dropout=getattr(config, "dropout", 0.0),
+            spacewise_every_n_layers=getattr(config, "spacewise_every_n_layers", 3),
+            scaler_cls=getattr(config, "scaler_cls", "per_variate_causal"),
+            output_distribution_classes=getattr(
+                config, "output_distribution_classes", ["student_t_mixture"]
+            ),
+            spacewise_first=getattr(config, "spacewise_first", True),
+            use_memory_efficient_attention=getattr(
+                config, "use_memory_efficient_attention", True
+            ),
+            stabilize_with_global=getattr(config, "stabilize_with_global", True),
+            scale_factor_exponent=getattr(config, "scale_factor_exponent", 10.0),
+        )
+        return cls(toto_model)
+
+    @property
+    def backbone(self):
+        """
+        Access the underlying TotoBackbone model used for inference.
+
+        Returns:
+            The ``TotoBackbone`` instance from the Toto model.
+        """
+        return self.toto.model
+
+    @property
+    def device(self):
+        """
+        Get the device of the model parameters.
+
+        Returns:
+            torch.device: The device where the model parameters reside.
+        """
+        return self.toto.device
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/toto/pipeline_toto.py b/iotdb-core/ainode/iotdb/ainode/core/model/toto/pipeline_toto.py
@@ -0,0 +1,66 @@
+import torch
+
+from iotdb.ainode.core.inference.pipeline.basic_pipeline import ForecastPipeline
+from iotdb.ainode.core.model.toto.data.util.dataset import MaskedTimeseries
+
+
+class TotoPipeline(ForecastPipeline):
+    def __init__(self, model_info, **model_kwargs):
+        super().__init__(model_info, **model_kwargs)
+
+    def preprocess(self, inputs, **infer_kwargs):
+        super().preprocess(inputs, **infer_kwargs)
+        processed_inputs = []
+
+        for item in inputs:
+            targets = item["targets"]
+            if targets.ndim == 1:
+                targets = targets.unsqueeze(0)
+
+            variate_count, series_len = targets.shape
+            device = targets.device
+
+            processed_inputs.append(
+                MaskedTimeseries(
+                    series=targets,
+                    padding_mask=torch.ones(
+                        (variate_count, series_len), dtype=torch.bool, device=device
+                    ),
+                    id_mask=torch.arange(
+                        variate_count, dtype=torch.int64, device=device
+                    )
+                    .unsqueeze(-1)
+                    .expand(variate_count, series_len),
+                    timestamp_seconds=torch.arange(
+                        series_len, dtype=torch.int64, device=device
+                    )
+                    .unsqueeze(0)
+                    .expand(variate_count, series_len),
+                    time_interval_seconds=torch.ones(
+                        variate_count, dtype=torch.int64, device=device
+                    ),
+                    num_exogenous_variables=0,
+                )
+            )
+
+        return processed_inputs
+
+    def forecast(self, inputs, **infer_kwargs):
+        output_length = infer_kwargs.get("output_length", 96)
+        num_samples = infer_kwargs.get("num_samples", None)
+
+        outputs = []
+        for item in inputs:
+            forecast = self.model.forecast(
+                item,
+                prediction_length=output_length,
+                num_samples=num_samples,
+            )
+            mean = forecast.mean
+            if mean.ndim == 3 and mean.shape[0] == 1:
+                mean = mean.squeeze(0)
+            outputs.append(mean)
+        return outputs
+
+    def postprocess(self, outputs, **infer_kwargs):
+        return super().postprocess(outputs, **infer_kwargs)