fmt

akoumpa · akoumpa · commit 6396b09fbd80 · 2026-02-13T02:44:48.000-08:00
Signed-off-by: Alexandros Koumparoulis &lt;akoumparouli@nvidia.com&gt;
diff --git a/nemo_automodel/components/distributed/megatron_fsdp.py b/nemo_automodel/components/distributed/megatron_fsdp.py
@@ -16,8 +16,9 @@
 
 import torch
 import torch.distributed as dist
-from torch.distributed.device_mesh import DeviceMesh
 import torch.nn as nn
+from torch.distributed.device_mesh import DeviceMesh
+
 from nemo_automodel.components.distributed.config import MegatronFSDPConfig
 from nemo_automodel.components.distributed.parallelizer import (
     _get_parallel_plan,
@@ -29,6 +30,7 @@
 try:
     from megatron_fsdp import MegatronFSDP
     from megatron_fsdp.fully_shard import fully_shard_optimizer as megatron_fsdp_fully_shard_optimizer
+
     HAS_MEGATRON_FSDP = True
 except (ImportError, FileNotFoundError):
     # raise FileNotFoundError(
@@ -160,14 +162,15 @@ def parallelize(self, model, optimizer=None):
 
         return model, optimizer
 
+
 def fully_shard_optimizer(
-    model: nn.Module,
-    optimizer: torch.optim.Optimizer, preproc_state_dict_for_dcp_ckpt: bool = True
+    model: nn.Module, optimizer: torch.optim.Optimizer, preproc_state_dict_for_dcp_ckpt: bool = True
 ) -> torch.optim.Optimizer:
-    """
-    """
+    """ """
     if not isinstance(model, MegatronFSDP):
         return optimizer
     if not HAS_MEGATRON_FSDP:
-        raise ImportError("MegatronFSDP is not installed, please visit https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/distributed/fsdp/src for more information")
+        raise ImportError(
+            "MegatronFSDP is not installed, please visit https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/distributed/fsdp/src for more information"
+        )
     return megatron_fsdp_fully_shard_optimizer(optimizer)
diff --git a/nemo_automodel/recipes/base_recipe.py b/nemo_automodel/recipes/base_recipe.py
@@ -42,7 +42,7 @@
     from transformers.tokenization_utils import PreTrainedTokenizerBase
 
 from nemo_automodel.components.checkpoint.checkpointing import save_config
-from nemo_automodel.components.config.loader import config_to_yaml_str, ConfigNode
+from nemo_automodel.components.config.loader import ConfigNode, config_to_yaml_str
 from nemo_automodel.components.optim.scheduler import OptimizerParamScheduler
 from nemo_automodel.components.training.rng import StatefulRNG
 from nemo_automodel.components.training.step_scheduler import StepScheduler
diff --git a/nemo_automodel/recipes/llm/train_ft.py b/nemo_automodel/recipes/llm/train_ft.py
@@ -48,11 +48,11 @@
 from nemo_automodel.components.datasets.llm.megatron_dataset import MegatronPretraining
 from nemo_automodel.components.datasets.llm.packed_sequence import pack_dataset
 from nemo_automodel.components.distributed.config import MegatronFSDPConfig
-from nemo_automodel.components.distributed.megatron_fsdp import fully_shard_optimizer
 from nemo_automodel.components.distributed.cp_utils import make_cp_batch_and_ctx
 from nemo_automodel.components.distributed.init_utils import (
     initialize_distributed,
 )
+from nemo_automodel.components.distributed.megatron_fsdp import fully_shard_optimizer
 from nemo_automodel.components.distributed.mesh import MeshContext
 from nemo_automodel.components.distributed.pipelining import AutoPipeline
 from nemo_automodel.components.distributed.utils import FirstRankPerNode, get_sync_ctx