progress towards working example

LeanderK · LeanderK · commit c7b3fc39a7f5 · 2025-04-17T15:33:29.000+01:00
diff --git a/pal/distribution/spline_distribution.py b/pal/distribution/spline_distribution.py
@@ -21,7 +21,15 @@ def compute_reordering_of_parameter_positions2d(
 ) -> Dict[int, int]:
     """
     Compute the reordering of the parameter positions for the polynomial so that
-    it conforms to the combination.
+    it conforms to the combination of univariate polynomials.
+
+    Parameters:
+    - deg (int): The degree of the polynomial.
+    - powers (torch.Tensor): A tensor of shape [n_mon, 2] containing the powers of the polynomial terms.
+      Each row represents a monomial with two variables.
+
+    Returns:
+    - Dict[int, int]: A mapping from the current parameter index to the reordered parameter index.
     """
     exponents_y0 = torch.arange(deg + 1)
     exponents_y1 = torch.arange(deg + 1)
@@ -478,7 +486,7 @@ def get_info_1d(
 
         return get_info_2d(self.knots, x)
 
-    def log_dens(self, x, eps=-1, with_indicator=False):
+    def log_dens(self, x, eps=-1, with_indicator=False) -> torch.Tensor:
         """
         Essentially computes log p(x) for the distribution, where p(x) is
         composed of:
diff --git a/pal/distribution/torch_polynomial.py b/pal/distribution/torch_polynomial.py
@@ -44,8 +44,8 @@ class TorchPolynomial(torch.nn.Module):
     using PyTorch tensors.
 
     Args:
-        coeffs (torch.Tensor): Tensor containing the coefficients of the polynomial.
-        powers (torch.Tensor): Tensor containing the powers of the polynomial.
+        coeffs (torch.Tensor): Tensor containing the coefficients of the polynomial. Shape: (num_terms,).
+        powers (torch.Tensor): Tensor containing the powers of the polynomial. Shape: (num_terms, num_vars).
         variable_map_dict (Dict[str, int] | frozendict[str, int]): Dictionary mapping variable names to their indices.
         absolute (bool): Whether to take the absolute value of the polynomial. Defaults to True.
     """
@@ -529,6 +529,16 @@ def calc_2d_spline_component(
     knot_idx: torch.Tensor,  # (2)
     params: torch.Tensor,  # (2, num_pieces, 4)
 ) -> torch.Tensor:
+    """
+    Calculate the 2D spline component for the given x and knot_idx.
+    The spline is the product of two cubic polynomials, one for each dimension.
+    Args:
+        x: The input tensor.
+        knot_idx: The indices of the knots.
+        params: The parameters of the spline.
+    Returns:
+        The calculated spline component.
+    """
     def per_param(
         p: torch.Tensor,  # (2, num_pieces)
     ) -> torch.Tensor:
@@ -576,7 +586,7 @@ def calc_log_mixture(
         lambda p: calc_2d_spline_component(x, knot_idx, p)
     )(
         m_params
-    )#.abs_()  # (num_mixtures)
+    )  # .abs_()  # (num_mixtures)
     m_normalization_coeff = m_normalization.sum(dim=-1).sum(
         dim=-1
     )  # (num_mixtures)
@@ -586,17 +596,4 @@ def calc_log_mixture(
         torch.log(densities_components) + torch.log(m_weights), dim=0
     )
 
-    # if eps != -1:
-    #     with torch.no_grad():
-    #         # mixture_poly[mixture_poly < eps] = eps
-    #         mixture_poly.clamp_(min=eps)
-
-    # mixture_poly_log = 2 * torch.log(mixture_poly) - torch.log(
-    #     m_normalization_coeff
-    # )
-
-    # mixture_poly_log = torch.log(mixture_poly ** 2)
-
-    # poly_log = torch.logsumexp(mixture_poly_log + torch.log(m_weights), dim=0)
-
     return poly_log
diff --git a/pal/training/train_mlp_sdd.py b/pal/training/train_mlp_sdd.py
@@ -0,0 +1,292 @@
+###############################################
+#
+# This file severs as an example of how to
+# train a MLP model using the SDD dataset using
+# the PAL library and a spline-distribution.
+#
+###############################################
+
+import pal.problem.sdd as csdd
+import pal.distribution.spline_distribution as spline
+from pal.wmi.compute_integral import integrate_distribution
+import torch
+import torch.nn as nn
+from typing import Callable, Any, Generic, TypeVar
+import numpy as np
+from torch.utils.data import DataLoader, TensorDataset
+from tqdm import tqdm
+import argparse
+
+T = TypeVar("T")
+
+
+class SimpleFC(Generic[T], nn.Module):
+    def __init__(
+        self,
+        input_size: int,
+        output_size: int,
+        hidden_sizes: list[int],
+        final_function: Callable[[torch.Tensor], tuple[torch.Tensor, ...]] | None = None,
+        final_module: nn.Module | None = None,
+    ) -> None:
+        super().__init__()
+        self.fcs = []
+        for i in range(len(hidden_sizes)):
+            if i == 0:
+                self.fcs.append(nn.Linear(input_size, hidden_sizes[i]))
+            else:
+                self.fcs.append(nn.Linear(hidden_sizes[i - 1], hidden_sizes[i]))
+        self.fcs.append(nn.Linear(hidden_sizes[-1], output_size))
+        self.fcs = nn.ModuleList(self.fcs)
+        self.final_function = final_function
+        self.final_module = final_module
+
+    def network(self, x: torch.Tensor) -> torch.Tensor:
+        for i in range(len(self.fcs) - 1):
+            x = self.fcs[i](x)
+            x = nn.functional.relu(x)
+        x = self.fcs[-1](x)
+        return x
+
+    def forward(self, x) -> T:
+        x = self.network(x)
+        if self.final_function is not None:
+            x = self.final_function(x)
+        if self.final_module is not None:
+            x = self.final_module(*x)
+        return x
+    
+    def __call__(self, *args, **kwds) -> T:
+        return super().__call__(*args, **kwds)
+
+
+def main(args: argparse.Namespace) -> None:
+    sdd = csdd.SDDSingleImageTrajectory(
+        img_id=args.img_id,
+        path="./data/sdd",
+    )
+
+    # load the constraints
+    lra_problem = sdd.create_constraints()
+
+    spline_distribution_builder = spline.SplineSQ2DBuilder(
+        constraints=lra_problem,
+        var_positions=sdd.get_y_vars(),
+        num_knots=args.num_knots,
+        num_mixtures=args.num_mixtures,
+    )
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    # create the distribution
+    conditional_spline_dist = integrate_distribution(
+        d=spline_distribution_builder,
+        device=device,
+        precision=torch.float64,
+    )
+
+    shape_value, shape_derivative, shape_mixture_weights = (
+        conditional_spline_dist.parameter_shape()
+    )
+
+    # create the model
+    net_size = args.net_size
+    if net_size == "small":
+        hidden_size = [512, 512]
+    elif net_size == "medium":
+        hidden_size = [1024, 1024]
+    elif net_size == "large":
+        hidden_size = [2048, 2048]
+
+    input_size = np.prod(sdd.get_x_shape())
+
+    total_output_size = (
+        np.prod(shape_value)
+        + np.prod(shape_derivative)
+        + np.prod(shape_mixture_weights)
+    )
+
+    def reparam(out_nn: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        param_mixture_weights = out_nn[:, :num_mixture_param].softmax(dim=-1)
+
+        param_dens_value = out_nn[:, num_mixture_param:(num_mixture_param + num_dens_knots_values)]
+        param_dens_value = param_dens_value.reshape(-1, *shape_value)
+
+        param_dens_derivative = out_nn[:, (num_mixture_param + num_dens_knots_values):]
+        param_dens_derivative = param_deriv_dens_scale * param_dens_derivative.reshape(-1, *shape_derivative)
+
+        return param_dens_value, param_dens_derivative, param_mixture_weights
+
+    model = SimpleFC[spline.SplineSQ2D](
+        input_size=input_size,
+        output_size=total_output_size,
+        hidden_sizes=hidden_size,
+        final_function=reparam,
+        final_module=conditional_spline_dist,
+    ).to(device)
+
+    # create the optimizer
+    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
+
+    print("Loading dataset")
+    dataset = sdd.load_dataset()
+    dataset_train = dataset.train
+    dataset_val = dataset.val
+    dataset_test = dataset.test
+    print(
+        f"Train size: {len(dataset_train)}, Val size: {len(dataset_val)}, Test size: {len(dataset_test)}"
+    )
+
+    # check if random seed is set
+    if args.seed is not None:
+        torch.manual_seed(args.seed)
+        np.random.seed(args.seed)
+
+    model.to(device)
+    conditional_spline_dist.to(device)
+    precision = torch.float64 if args.use_float64 else torch.float32
+    model.to(precision)
+
+    batch_size = args.batch_size
+
+    loader = DataLoader(
+        dataset_train,
+        batch_size=batch_size,
+        shuffle=True,
+        pin_memory=False,
+        num_workers=10,
+    )
+
+    loader_val = DataLoader(dataset_val, batch_size=batch_size)
+
+    loader_test = DataLoader(dataset_test, batch_size=batch_size)
+
+    if args.init_last_layer_positive:
+        num_mixture_param = np.prod(shape_mixture_weights)
+        num_dens_knots_values = np.prod(shape_value)
+
+        with torch.no_grad():
+            last_layer = model.fcs[-1]
+            pos_sub = 0.1 * torch.abs(
+                last_layer.weight.data[
+                    num_mixture_param:(num_mixture_param + num_dens_knots_values)
+                ]
+            )
+            last_layer.weight.data[
+                num_mixture_param:(num_mixture_param + num_dens_knots_values)
+            ] = pos_sub
+            last_layer.bias.data = torch.zeros_like(last_layer.bias.data)
+
+    epochs = args.epochs
+
+    param_deriv_dens_scale = 0.1
+
+    for epoch in tqdm(range(epochs), desc="Epochs"):
+        model.train()
+        for i, (x, y) in enumerate(tqdm(loader, desc="Training", leave=False)):
+            x = x.to(device).to(precision)
+            y = y.to(device).to(precision)
+
+            # forward pass
+            log_dens = model(x).log_dens(y)
+
+            loss = - log_dens.mean()
+            # backward pass
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+        # validation
+        model.eval()
+        with torch.no_grad():
+            val_ll = []
+            for i, (x, y) in enumerate(tqdm(loader_val, desc="Validation", leave=False)):
+                x = x.to(device).to(precision)
+                y = y.to(device).to(precision)
+
+                log_dens = model(x).log_dens(y)
+                val_ll.append(log_dens.to("cpu"))
+            val_ll = torch.cat(val_ll).mean()
+            print(f"Epoch {epoch}: Validation log-likelihood: {val_ll:.4f}")
+
+    # test
+    with torch.no_grad():
+        test_ll = []
+        for i, (x, y) in enumerate(tqdm(loader_test, desc="Test", leave=False)):
+            x = x.to(device).to(precision)
+            y = y.to(device).to(precision)
+
+            log_dens = model(x).log_dens(y)
+            test_ll.append(log_dens.to("cpu"))
+        test_ll = torch.cat(test_ll).mean()
+        print(f"Test log-likelihood: {test_ll:.4f}")
+
+
+def args():
+    parser = argparse.ArgumentParser(description="Train a MLP model using the SDD dataset")
+    parser.add_argument(
+        "--img_id",
+        type=int,
+        default=12,
+        help="Image ID to use for the SDD dataset",
+    )
+    parser.add_argument(
+        "--num_knots",
+        type=int,
+        default=10,
+        help="Number of knots to use for the spline distribution",
+    )
+    parser.add_argument(
+        "--num_mixtures",
+        type=int,
+        default=5,
+        help="Number of mixtures to use for the spline distribution",
+    )
+    parser.add_argument(
+        "--net_size",
+        type=str,
+        choices=["small", "medium", "large"],
+        default="medium",
+        help="Size of the neural network",
+    )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=64,
+        help="Batch size for training",
+    )
+    parser.add_argument(
+        "--lr",
+        type=float,
+        default=1e-3,
+        help="Learning rate for the optimizer",
+    )
+    parser.add_argument(
+        "--epochs",
+        type=int,
+        default=20,
+        help="Number of epochs to train for",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Random seed for reproducibility",
+    )
+    parser.add_argument(
+        "--use_float64",
+        action="store_true",
+        help="Use float64 precision instead of float32",
+    )
+    parser.add_argument(
+        "--init_last_layer_positive",
+        action="store_true",
+        help="Initialize the last layer of the network to be positive",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = args()
+    main(args)
diff --git a/pal/wmi/gasp/gasp/torch/wmipa/numerical_symb_integrator_pa.py b/pal/wmi/gasp/gasp/torch/wmipa/numerical_symb_integrator_pa.py
@@ -45,8 +45,8 @@ def __init__(
         mode: IntegratorModes,  # whether to integrate over the weights or the function
         total_degree: int,  # total degree of the polynomial, so max over sum of exponents for each monomial.
         variable_map: dict[str, int],  # name => index
-        sum_seperately=False,
-        with_sorting=False,
+        sum_seperately=True,
+        with_sorting=True,
         batch_size=None,
         monomials_lower_precision=True,
         n_workers=7,
@@ -58,8 +58,8 @@ def __init__(
             total_degree (int): The total degree of the polynomial, so max over sum of exponents for each monomial.
                 Can be an estimate for WeightedFormulaMode.
             variable_map (dict[str, int]): A mapping from variable names to indices.
-            sum_seperately (bool, optional): Whether to sum the results separately. Defaults to False.
-            with_sorting (bool, optional): Whether to sort the results. Defaults to False.
+            sum_seperately (bool, optional): Whether to sum the results separately. Defaults to True.
+            with_sorting (bool, optional): Whether to sort the results. Defaults to True.
             batch_size (int, optional): The batch size for integration. Defaults to None.
             monomials_lower_precision (bool, optional): Whether to use lower precision for monomials. Defaults to True.
             n_workers (int, optional): The number of workers for parallel processing. Defaults to 7.
diff --git a/tests_pal/test_integration_pipeline.py b/tests_pal/test_integration_pipeline.py