feat: passthrough if inputs_embeds passed into nanov3 (#1261)

adil-a · NeMo Bot · commit 84cb6e82a91b · 2026-02-13T07:14:53.000Z
* fix

Signed-off-by: adil-a &lt;adil.asif2000@hotmail.com&gt;

* rename

Signed-off-by: adil-a &lt;adil.asif2000@hotmail.com&gt;

* unit tests

Signed-off-by: adil-a &lt;adil.asif2000@hotmail.com&gt;

---------

Signed-off-by: adil-a &lt;adil.asif2000@hotmail.com&gt;
Signed-off-by: NeMo Bot &lt;nemo-bot@nvidia.com&gt;
diff --git a/nemo_automodel/components/models/nemotron_v3/model.py b/nemo_automodel/components/models/nemotron_v3/model.py
@@ -99,25 +99,32 @@ def __init__(
 
     def forward(
         self,
-        input_ids: torch.LongTensor,
+        input_ids: torch.LongTensor | None = None,
         *,
         attention_mask: torch.Tensor | None = None,
         causal_mask_mapping: dict[str, torch.Tensor] | None = None,
+        inputs_embeds: torch.Tensor | None = None,
         **kwargs: Any,
     ) -> torch.Tensor:
         """Forward pass through the model.
 
         Args:
-            input_ids: Input token IDs [batch_size, seq_len]
+            input_ids: Input token IDs [batch_size, seq_len] (optional)
             attention_mask: 2D padding mask [batch_size, seq_len] (1=real, 0=padding)
             causal_mask_mapping: Dict with precomputed 4D causal masks for attention layers
+            inputs_embeds: Input embeddings [batch_size, seq_len, hidden_size] (optional)
             **kwargs: Additional arguments (ignored)
 
         Returns:
             Hidden states tensor [batch_size, seq_len, hidden_size]
         """
         # Get embeddings
-        hidden_states = self.embed_tokens(input_ids)
+        if inputs_embeds is None:
+            if input_ids is None:
+                raise ValueError("input_ids must be provided if inputs_embeds is not provided")
+            hidden_states = self.embed_tokens(input_ids)
+        else:
+            hidden_states = inputs_embeds
 
         # TODO: attention mask currently does not work. A default causal mask is applied.
 
@@ -244,7 +251,7 @@ def __init__(
 
     def forward(
         self,
-        input_ids: torch.LongTensor,
+        input_ids: torch.LongTensor | None = None,
         *,
         attention_mask: torch.Tensor | None = None,
         causal_mask_mapping: dict[str, torch.Tensor] | None = None,
@@ -253,7 +260,7 @@ def forward(
         """Forward pass with optional loss computation.
 
         Args:
-            input_ids: Input token IDs [batch_size, seq_len]
+            input_ids: Input token IDs [batch_size, seq_len] (optional)
             attention_mask: 2D padding mask [batch_size, seq_len]
             causal_mask_mapping: Dict with precomputed 4D causal masks
             **kwargs: Additional arguments
diff --git a/tests/unit_tests/models/nemotron_v3/test_nemotron_v3_model.py b/tests/unit_tests/models/nemotron_v3/test_nemotron_v3_model.py
@@ -187,6 +187,76 @@ def test_model_forward_with_causal_mask_mapping(self, config, backend):
 
         assert output.shape == (batch_size, seq_len, config.hidden_size)
 
+    def test_model_forward_with_inputs_embeds(self, config, backend):
+        """Test model forward pass with inputs_embeds instead of input_ids."""
+        from nemo_automodel.components.models.nemotron_v3.model import NemotronV3Model
+
+        model = NemotronV3Model(config, backend=backend)
+        model = model.to(torch.bfloat16)
+
+        batch_size, seq_len = 2, 8
+        inputs_embeds = torch.randn(batch_size, seq_len, config.hidden_size, dtype=torch.bfloat16)
+
+        output = model(inputs_embeds=inputs_embeds)
+
+        assert output.shape == (batch_size, seq_len, config.hidden_size)
+
+    def test_model_forward_inputs_embeds_bypasses_embedding(self, config, backend):
+        """Test that inputs_embeds bypasses the embedding layer."""
+        from nemo_automodel.components.models.nemotron_v3.model import NemotronV3Model
+
+        model = NemotronV3Model(config, backend=backend)
+        model = model.to(torch.bfloat16)
+
+        batch_size, seq_len = 2, 8
+        inputs_embeds = torch.randn(batch_size, seq_len, config.hidden_size, dtype=torch.bfloat16)
+
+        # Should work even with input_ids=None (the default)
+        output = model(input_ids=None, inputs_embeds=inputs_embeds)
+
+        assert output.shape == (batch_size, seq_len, config.hidden_size)
+
+    def test_model_forward_inputs_embeds_takes_precedence(self, config, backend):
+        """Test that inputs_embeds takes precedence over input_ids when both provided."""
+        from nemo_automodel.components.models.nemotron_v3.model import NemotronV3Model
+
+        model = NemotronV3Model(config, backend=backend)
+        model = model.to(torch.bfloat16)
+
+        batch_size, seq_len = 2, 8
+        input_ids = torch.randint(0, config.vocab_size, (batch_size, seq_len))
+        inputs_embeds = torch.randn(batch_size, seq_len, config.hidden_size, dtype=torch.bfloat16)
+
+        # When both are provided, inputs_embeds should be used (input_ids ignored)
+        output = model(input_ids, inputs_embeds=inputs_embeds)
+
+        assert output.shape == (batch_size, seq_len, config.hidden_size)
+
+    def test_model_forward_no_input_ids_no_inputs_embeds_raises(self, config, backend):
+        """Test that ValueError is raised when neither input_ids nor inputs_embeds is provided."""
+        from nemo_automodel.components.models.nemotron_v3.model import NemotronV3Model
+
+        model = NemotronV3Model(config, backend=backend)
+        model = model.to(torch.bfloat16)
+
+        with pytest.raises(ValueError, match="input_ids must be provided if inputs_embeds is not provided"):
+            model(input_ids=None)
+
+    def test_model_forward_inputs_embeds_with_mask(self, config, backend):
+        """Test model forward pass with inputs_embeds and attention mask."""
+        from nemo_automodel.components.models.nemotron_v3.model import NemotronV3Model
+
+        model = NemotronV3Model(config, backend=backend)
+        model = model.to(torch.bfloat16)
+
+        batch_size, seq_len = 2, 8
+        inputs_embeds = torch.randn(batch_size, seq_len, config.hidden_size, dtype=torch.bfloat16)
+        attention_mask = torch.ones(batch_size, seq_len)
+
+        output = model(inputs_embeds=inputs_embeds, attention_mask=attention_mask)
+
+        assert output.shape == (batch_size, seq_len, config.hidden_size)
+
     def test_model_moe_config_creation(self, config, backend):
         """Test that model creates MoE config correctly."""
         from nemo_automodel.components.models.nemotron_v3.model import NemotronV3Model
@@ -307,6 +377,31 @@ def test_causal_lm_forward_float32_logits(self, config, backend):
 
         assert logits.dtype == torch.float32
 
+    def test_causal_lm_forward_with_inputs_embeds(self, config, backend):
+        """Test causal LM forward pass with inputs_embeds."""
+        from nemo_automodel.components.models.nemotron_v3.model import NemotronHForCausalLM
+
+        model = NemotronHForCausalLM(config, backend=backend)
+        model = model.to(torch.bfloat16)
+
+        batch_size, seq_len = 2, 8
+        inputs_embeds = torch.randn(batch_size, seq_len, config.hidden_size, dtype=torch.bfloat16)
+
+        logits = model(inputs_embeds=inputs_embeds)
+
+        assert logits.shape == (batch_size, seq_len, config.vocab_size)
+        assert logits.dtype == torch.float32
+
+    def test_causal_lm_forward_no_input_ids_no_inputs_embeds_raises(self, config, backend):
+        """Test that ValueError is raised when neither input_ids nor inputs_embeds is provided."""
+        from nemo_automodel.components.models.nemotron_v3.model import NemotronHForCausalLM
+
+        model = NemotronHForCausalLM(config, backend=backend)
+        model = model.to(torch.bfloat16)
+
+        with pytest.raises(ValueError, match="input_ids must be provided if inputs_embeds is not provided"):
+            model()
+
     def test_causal_lm_from_config(self, config, backend):
         """Test from_config classmethod."""
         from nemo_automodel.components.models.nemotron_v3.model import NemotronHForCausalLM