NVIDIA-NeMo · nabinchha · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025
@@ -4,10 +4,10 @@
 from enum import Enum
 from typing import Literal
 
-import pandas as pd
 from pydantic import Field, field_validator, model_validator
 from typing_extensions import Self, TypeAlias
 
+from data_designer import lazy_imports
 from data_designer.config.base import ConfigBase
 from data_designer.config.utils.constants import (
     AVAILABLE_LOCALES,
@@ -113,7 +113,7 @@ class DatetimeSamplerParams(ConfigBase):
     @classmethod
     def _validate_param_is_datetime(cls, value: str) -> str:
         try:
-            pd.to_datetime(value)
+            lazy_imports.pd.to_datetime(value)
         except ValueError:
             raise ValueError(f"Invalid datetime format: {value}")
         return value

@@ -1,2 +1,108 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+
+"""
+Engine module with fully automatic lazy loading.
+
+This module automatically discovers ALL engine modules and their public classes/functions,
+providing a facade that lazily imports components only when accessed. This significantly
+improves import performance while requiring ZERO maintenance - just add a module and it's
+automatically exported.
+
+Note: Private modules (starting with _) are excluded from auto-discovery.
+"""
+
+import ast
+import importlib
+from pathlib import Path
+
+
+def _discover_all_engine_exports() -> dict[str, tuple[str, str]]:
+    """Automatically discover all public classes/functions in the engine package.
+
+    Scans the engine directory recursively for all Python files, parses them
+    with AST (without importing), and builds a mapping of all public exports.
+
+    Returns:
+        Dictionary mapping public names to (module_path, attribute_name) tuples.
+    """
+    lazy_imports = {}
+    engine_dir = Path(__file__).parent
+
+    # Find all Python files in engine directory recursively
+    for py_file in engine_dir.rglob("*.py"):
+        # Skip __init__.py files and private modules (starting with _)
+        if py_file.name.startswith("_"):
+            continue
+
+        # Convert file path to module path
+        # e.g., dataset_builders/column_wise_builder.py -> data_designer.engine.dataset_builders.column_wise_builder
+        rel_path = py_file.relative_to(engine_dir.parent)
+        module_parts = list(rel_path.parts[:-1]) + [rel_path.stem]
+        module_path = ".".join(["data_designer"] + module_parts)
+
+        try:
+            # Parse the Python file with AST (doesn't import it - fast!)
+            with open(py_file, "r", encoding="utf-8") as f:
+                tree = ast.parse(f.read(), filename=str(py_file))
+
+            # Find all top-level public classes and functions
+            for node in tree.body:
+                if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
+                    name = node.name
+                    # Only export public items (no leading underscore)
+                    if not name.startswith("_"):
+                        # Avoid name collisions - first one wins
+                        if name not in lazy_imports:
+                            lazy_imports[name] = (module_path, name)
+        except Exception:
+            # If AST parsing fails, skip this module silently
+            pass
+
+    return lazy_imports
+
+
+# Cache for lazy imports - built on first access
+_LAZY_IMPORTS_CACHE: dict[str, tuple[str, str]] | None = None
+
+
+def __getattr__(name: str) -> object:
+    """Lazily import engine components when accessed.
+
+    On first access, automatically discovers all public classes/functions in the
+    engine package. Subsequent accesses use the cached mapping for fast lookups.
+
+    Args:
+        name: The name of the attribute to import.
+
+    Returns:
+        The imported class, function, or object.
+
+    Raises:
+        AttributeError: If the attribute is not found in any engine module.
+    """
+    global _LAZY_IMPORTS_CACHE
+
+    # Build cache on first access
+    if _LAZY_IMPORTS_CACHE is None:
+        _LAZY_IMPORTS_CACHE = _discover_all_engine_exports()
+
+    if name in _LAZY_IMPORTS_CACHE:
+        module_path, attr_name = _LAZY_IMPORTS_CACHE[name]
+        # Dynamically import the module
+        module = importlib.import_module(module_path)
+        # Get the attribute from the module
+        return getattr(module, attr_name)
+
+    raise AttributeError(f"module 'data_designer.engine' has no attribute {name!r}")
+
+
+def __dir__() -> list[str]:
+    """Return list of all available lazy imports for introspection."""
+    global _LAZY_IMPORTS_CACHE
+
+    # Build cache if not already built
+    if _LAZY_IMPORTS_CACHE is None:
+        _LAZY_IMPORTS_CACHE = _discover_all_engine_exports()
+
+    return list(_LAZY_IMPORTS_CACHE.keys())
@@ -1,11 +1,13 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+from __future__ import annotations
+
 import logging
 from pathlib import Path
 
-import pandas as pd
-
+# Lazy-loaded third-party and engine components via facades
+from data_designer import engine, lazy_imports
 from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
 from data_designer.config.config_builder import DataDesignerConfigBuilder
 from data_designer.config.default_model_settings import (
@@ -29,10 +31,6 @@
     PREDEFINED_PROVIDERS,
 )
 from data_designer.config.utils.info import InfoType, InterfaceInfo
-from data_designer.engine.analysis.dataset_profiler import (
-    DataDesignerDatasetProfiler,
-    DatasetProfilerConfig,
-)
 from data_designer.engine.compiler import compile_data_designer_config
 from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
 from data_designer.engine.dataset_builders.column_wise_builder import ColumnWiseDatasetBuilder
@@ -221,7 +219,7 @@ def preview(
 
         dropped_columns = raw_dataset.columns.difference(processed_dataset.columns)
         if len(dropped_columns) > 0:
-            dataset_for_profiler = pd.concat([processed_dataset, raw_dataset[dropped_columns]], axis=1)
+            dataset_for_profiler = lazy_imports.pd.concat([processed_dataset, raw_dataset[dropped_columns]], axis=1)
         else:
             dataset_for_profiler = processed_dataset
 
@@ -233,7 +231,7 @@ def preview(
 
         if builder.artifact_storage.processors_outputs_path.exists():
             processor_artifacts = {
-                processor_config.name: pd.read_parquet(
+                processor_config.name: lazy_imports.pd.read_parquet(
                     builder.artifact_storage.processors_outputs_path / f"{processor_config.name}.parquet",
                     dtype_backend="pyarrow",
                 ).to_dict(orient="records")
@@ -364,10 +362,10 @@ def _create_dataset_builder(
         )
 
     def _create_dataset_profiler(
-        self, config_builder: DataDesignerConfigBuilder, resource_provider: ResourceProvider
-    ) -> DataDesignerDatasetProfiler:
-        return DataDesignerDatasetProfiler(
-            config=DatasetProfilerConfig(
+        self, config_builder: DataDesignerConfigBuilder, resource_provider: engine.ResourceProvider
+    ) -> engine.DataDesignerDatasetProfiler:
+        return engine.DataDesignerDatasetProfiler(
+            config=engine.DatasetProfilerConfig(
                 column_configs=config_builder.get_column_configs(),
                 column_profiler_configs=config_builder.get_profilers(),
             ),

@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Lazy imports facade for heavy third-party dependencies.
+
+This module provides a centralized facade that lazily imports heavy dependencies
+(pandas, pyarrow, etc.) only when accessed, significantly improving import performance.
+
+Usage:
+    from data_designer import lazy_imports
+
+    df = lazy_imports.pd.DataFrame(...)
+    schema = lazy_imports.pq.read_schema(...)
+"""
+
+
+def __getattr__(name: str) -> object:
+    """Lazily import heavy third-party dependencies when accessed.
+
+    This allows fast imports of data_designer while deferring loading of heavy
+    libraries like pandas and pyarrow until they're actually needed.
+
+    Supported imports:
+        - pd: pandas module
+        - pq: pyarrow.parquet module
+    """
+    if name == "pd":
+        import pandas as pd
+
+        return pd
+    elif name == "pq":
+        import pyarrow.parquet as pq
+
+        return pq
+
+    raise AttributeError(f"module 'data_designer.lazy_imports' has no attribute {name!r}")
+
+
+# For type checking
+def __dir__() -> list[str]:
+    """Return list of available lazy imports."""
+    return ["pd", "pq"]