NVIDIA-NeMo · Edresson · Apr 28, 2026
diff --git a/examples/speechlm2/conf/duplex_eartts.yaml b/examples/speechlm2/conf/duplex_eartts.yaml
@@ -3,6 +3,7 @@ model:
   pretrained_audio_codec: ???  # to be released
   pretrained_tts_model: null
   scoring_asr: stt_en_fastconformer_transducer_large # used only in validation/evaluation
+  trust_remote_code: true
 
   # Regexp (re.compile) patterns matching parameters to be frozen.
   freeze_params:

diff --git a/examples/speechlm2/conf/duplex_stt.yaml b/examples/speechlm2/conf/duplex_stt.yaml
@@ -5,6 +5,7 @@ model:
   scoring_asr: stt_en_fastconformer_transducer_large  # used only in validation/evaluation
 
   pretrained_weights: True  # When False, we use pretrained_name to load the architecture, but with random init
+  trust_remote_code: true
 
   prevent_freeze_params: []  # Use to make specific submodules trainable; overrides freeze_params
 

diff --git a/nemo/collections/speechlm2/models/duplex_ear_tts.py b/nemo/collections/speechlm2/models/duplex_ear_tts.py
@@ -95,7 +95,7 @@ def __init__(self, cfg: dict) -> None:
         self.tokenizer = AutoTokenizer(
             self.cfg.pretrained_lm_name,
             use_fast=True,
-            trust_remote_code=True,
+            trust_remote_code=self.cfg.get("trust_remote_code", False),
             bos_token=self.cfg.get("bos_token", None),
             eos_token=self.cfg.get("eos_token", None),
             pad_token=self.cfg.get("pad_token", None),

diff --git a/nemo/collections/speechlm2/models/duplex_stt_model.py b/nemo/collections/speechlm2/models/duplex_stt_model.py
@@ -83,7 +83,7 @@ def __init__(self, cfg: dict) -> None:
         llm = load_pretrained_hf(
             self.cfg.pretrained_llm,
             pretrained_weights=self.cfg.pretrained_weights,
-            trust_remote_code=self.cfg.get("trust_remote_code", True),
+            trust_remote_code=self.cfg.get("trust_remote_code", False),
         ).train()
 
         # Initialize tokenizer with optional special tokens from config

diff --git a/tests/collections/speechlm2/test_duplex_eartts.py b/tests/collections/speechlm2/test_duplex_eartts.py
@@ -36,6 +36,7 @@
         "pretrained_lm_name": "nvidia/NVIDIA-Nemotron-Nano-9B-v2",
         "pretrained_ae_dir": None,
         "pretrained_tts_model": None,
+        "trust_remote_code": True,
         "scoring_asr": "stt_en_fastconformer_transducer_large",
         "freeze_params": [
             r"^audio_codec\..+$",  # Keep audio codec frozen as it only provides supervision for training.

diff --git a/tests/collections/speechlm2/test_duplex_stt.py b/tests/collections/speechlm2/test_duplex_stt.py
@@ -49,6 +49,7 @@ def create_model(
         "model": {
             **resolve_pretrained_models(),
             "pretrained_weights": False,
+            "trust_remote_code": True,
             "audio_loss_weight": 1,
             "text_loss_weight": 3,
             "source_sample_rate": 16000,