Skip to content

Commit 6b1da44

Browse files
authored
feature: forecaster hyperparams and eval metrics (OpenSTEF#746)
* feature(OpenSTEF#729) Removed to_state and from_state methods in favor of builtin python state saving functions. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * feature(OpenSTEF#729): Fixed issue where generic transform pipeline could not be serialized. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * feature(OpenSTEF#729): Added more state saving tests Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * feature(OpenSTEF#729): Added more state saving tests Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * feature(OpenSTEF#729): Added more state saving tests Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * feature: standardized objective function. Added custom evaluation functions for forecasters. * fix: Formatting. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> --------- Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>
1 parent dea41c8 commit 6b1da44

7 files changed

Lines changed: 198 additions & 28 deletions

File tree

packages/openstef-beam/src/openstef_beam/metrics/metrics_probabilistic.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy.typing as npt
2020

2121
from openstef_core.exceptions import MissingExtraError
22+
from openstef_core.types import Quantile
2223

2324

2425
def crps(
@@ -214,3 +215,58 @@ def mean_absolute_calibration_error(
214215
"""
215216
observed_probs = np.array([observed_probability(y_true, y_pred[:, i]) for i in range(len(quantiles))])
216217
return float(np.mean(np.abs(observed_probs - quantiles)))
218+
219+
220+
def mean_pinball_loss(
221+
y_true: npt.NDArray[np.floating],
222+
y_pred: npt.NDArray[np.floating],
223+
quantiles: list[Quantile],
224+
sample_weight: npt.NDArray[np.floating] | None = None,
225+
) -> float:
226+
"""Calculate the Mean Pinball Loss for quantile forecasts.
227+
228+
The Pinball Loss is a proper scoring rule for evaluating quantile forecasts.
229+
It penalizes under- and over-predictions differently based on the quantile level.
230+
231+
Args:
232+
y_true: Observed values with shape (num_samples,) or (num_samples, num_quantiles).
233+
y_pred: Predicted quantiles with shape (num_samples, num_quantiles).
234+
Each column corresponds to predictions for a specific quantile level.
235+
quantiles: Quantile levels with shape (num_quantiles,).
236+
Must be sorted in ascending order and contain values in [0, 1].
237+
sample_weight: Optional weights for each sample with shape (num_samples,).
238+
239+
Returns:
240+
The weighted average Pinball Loss across all samples and quantiles. Lower values indicate better
241+
forecast quality.
242+
"""
243+
# Resize the predictions and targets.
244+
y_pred = np.reshape(y_pred, [-1, len(quantiles)])
245+
n_rows = y_pred.shape[0]
246+
y_true = np.reshape(y_true, [n_rows, -1])
247+
sample_weight = np.reshape(sample_weight, [n_rows, 1]) if sample_weight is not None else None
248+
249+
# Extract quantile values into array for vectorized operations
250+
quantile_values = np.array(quantiles) # shape: (n_quantiles,)
251+
252+
# Compute errors for all quantiles at once
253+
errors = y_true - y_pred # shape: (num_samples, num_quantiles)
254+
255+
# Compute masks for all quantiles simultaneously
256+
underpredict_mask = errors >= 0 # y_true >= y_pred, shape: (num_samples, num_quantiles)
257+
overpredict_mask = errors < 0 # y_true < y_pred, shape: (num_samples, num_quantiles)
258+
259+
# Vectorized pinball loss computation using broadcasting
260+
# quantiles broadcasts from (num_quantiles,) to (num_samples, num_quantiles)
261+
loss = quantiles * underpredict_mask * errors - (1 - quantile_values) * overpredict_mask * errors
262+
263+
# Apply sample weights if provided
264+
if sample_weight is not None:
265+
sample_weight = np.asarray(sample_weight).reshape(-1, 1) # shape: (num_samples, 1)
266+
loss *= sample_weight
267+
total_weight = sample_weight.sum() * len(quantiles)
268+
else:
269+
total_weight = loss.size
270+
271+
# Return mean loss across all samples and quantiles
272+
return float(loss.sum() / total_weight)

packages/openstef-beam/tests/unit/metrics/test_metrics_probabilistic.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66

77
import numpy as np
88
import pytest
9+
from sklearn.metrics import mean_pinball_loss as sk_mean_pinball_loss
910

1011
from openstef_beam.metrics import crps, mean_absolute_calibration_error, rcrps
12+
from openstef_beam.metrics.metrics_probabilistic import mean_pinball_loss
13+
from openstef_core.types import Q
1114

1215

1316
# CRPS Test Cases
@@ -151,3 +154,33 @@ def test_mean_absolute_calibration_error() -> None:
151154

152155
assert isinstance(result, float)
153156
assert result == (0.4 + 0.4) / 3 # observed probabilities are 0.5, 0.5, 0.5 vs 0.1, 0.5, 0.9 quantiles
157+
158+
159+
def test_mean_pinball_loss_matches_sklearn_average_when_multi_quantile():
160+
# Arrange
161+
rng = np.random.default_rng(seed=42)
162+
n = 40
163+
y_true = rng.normal(loc=1.0, scale=2.0, size=n)
164+
quantiles = [Q(0.1), Q(0.5), Q(0.9)]
165+
# Simulate predictions with different biases per quantile; shape (n, q)
166+
y_pred = np.stack(
167+
[
168+
y_true + rng.normal(0, 0.7, size=n) - 0.4, # q=0.1
169+
y_true + rng.normal(0, 0.5, size=n) + 0.0, # q=0.5
170+
y_true + rng.normal(0, 0.7, size=n) + 0.4, # q=0.9
171+
],
172+
axis=1,
173+
)
174+
175+
# Act
176+
actual = mean_pinball_loss(y_true=y_true, y_pred=y_pred, quantiles=quantiles)
177+
expected = np.mean(
178+
np.array(
179+
[sk_mean_pinball_loss(y_true, y_pred[:, i], alpha=float(quantile)) for i, quantile in enumerate(quantiles)],
180+
dtype=float,
181+
)
182+
)
183+
184+
# Assert
185+
# Multi-quantile mean should equal average of sklearn per-quantile losses
186+
assert np.allclose(actual, expected, rtol=1e-12, atol=1e-12)

packages/openstef-core/src/openstef_core/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ class PredictError(Exception):
104104
"""Exception raised for errors during forecasting operations."""
105105

106106

107+
class InputValidationError(ValueError):
108+
"""Exception raised for input validation errors."""
109+
110+
107111
class ModelLoadingError(Exception):
108112
"""Exception raised when a model fails to load properly."""
109113

packages/openstef-models/src/openstef_models/models/forecasting/gblinear_forecaster.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
to predict values outside the range of the training data.
1212
"""
1313

14-
from functools import partial
1514
from typing import Literal, override
1615

1716
import numpy as np
@@ -22,11 +21,16 @@
2221

2322
from openstef_core.datasets.mixins import LeadTime
2423
from openstef_core.datasets.validated_datasets import ForecastDataset, ForecastInputDataset
25-
from openstef_core.exceptions import MissingExtraError, NotFittedError
24+
from openstef_core.exceptions import InputValidationError, MissingExtraError, NotFittedError
2625
from openstef_core.mixins.predictor import HyperParams
2726
from openstef_models.explainability.mixins import ExplainableForecaster
2827
from openstef_models.models.forecasting.forecaster import Forecaster, ForecasterConfig
29-
from openstef_models.utils.loss_functions import OBJECTIVE_MAP, ObjectiveFunctionType, xgb_prepare_target_for_objective
28+
from openstef_models.utils.evaluation_functions import EvaluationFunctionType, get_evaluation_function
29+
from openstef_models.utils.loss_functions import (
30+
ObjectiveFunctionType,
31+
get_objective_function,
32+
xgb_prepare_target_for_objective,
33+
)
3034

3135
try:
3236
import xgboost as xgb
@@ -52,16 +56,22 @@ class GBLinearHyperParams(HyperParams):
5256
"rounds.",
5357
)
5458
objective: ObjectiveFunctionType | Literal["reg:quantileerror"] = Field(
55-
default="pinball_loss",
56-
description="Objective function for training. 'pinball_loss' is recommended for probabilistic forecasting.",
59+
default="reg:quantileerror",
60+
description="Objective function for training. 'reg:quantileerror' is recommended "
61+
"for probabilistic forecasting.",
62+
)
63+
evaluation_metric: EvaluationFunctionType = Field(
64+
default="mean_pinball_loss",
65+
description="Metric used for evaluation during training. Defaults to 'mean_pinball_loss' "
66+
"for quantile regression.",
5767
)
5868

5969
# Regularization
6070
reg_alpha: float = Field(
6171
default=0.0001, description="L1 regularization on weights. Higher values increase regularization. Range: [0,∞]"
6272
)
6373
reg_lambda: float = Field(
64-
default=0.0, description="L2 regularization on weights. Higher values increase regularization. Range: [0,∞]"
74+
default=0.1, description="L2 regularization on weights. Higher values increase regularization. Range: [0,∞]"
6575
)
6676

6777
# Feature selection
@@ -176,15 +186,9 @@ def __init__(self, config: GBLinearForecasterConfig) -> None:
176186
"""
177187
self._config = config or GBLinearForecasterConfig()
178188

179-
if self.config.hyperparams.objective == "reg:quantileerror":
180-
objective = "reg:quantileerror"
181-
else:
182-
objective = partial(OBJECTIVE_MAP[self._config.hyperparams.objective], quantiles=self._config.quantiles)
183-
184189
self._gblinear_model = xgb.XGBRegressor(
185190
booster="gblinear",
186191
# Core parameters for forecasting
187-
objective=objective,
188192
n_estimators=self._config.hyperparams.n_steps,
189193
learning_rate=self._config.hyperparams.learning_rate,
190194
early_stopping_rounds=self._config.hyperparams.early_stopping_rounds,
@@ -196,6 +200,16 @@ def __init__(self, config: GBLinearForecasterConfig) -> None:
196200
updater=self._config.hyperparams.updater,
197201
quantile_alpha=[float(q) for q in self._config.quantiles],
198202
top_k=self._config.hyperparams.top_k if self._config.hyperparams.feature_selector == "thrifty" else None,
203+
# Objective
204+
objective=get_objective_function(
205+
function_type=self._config.hyperparams.objective, quantiles=self._config.quantiles
206+
)
207+
if self._config.hyperparams.objective != "reg:quantileerror"
208+
else "reg:quantileerror",
209+
eval_metric=get_evaluation_function(
210+
function_type=self._config.hyperparams.evaluation_metric, quantiles=self._config.quantiles
211+
),
212+
disable_default_eval_metric=True,
199213
)
200214
self._target_scaler = StandardScaler()
201215

@@ -216,7 +230,6 @@ def is_fitted(self) -> bool:
216230

217231
def _prepare_fit_input(self, data: ForecastInputDataset) -> tuple[pd.DataFrame, np.ndarray, pd.Series]:
218232
input_data: pd.DataFrame = data.input_data()
219-
220233
# Scale the target variable
221234
target: np.ndarray = np.asarray(data.target_series.values)
222235
target = self._target_scaler.transform(target.reshape(-1, 1)).flatten()
@@ -234,9 +247,12 @@ def _prepare_fit_input(self, data: ForecastInputDataset) -> tuple[pd.DataFrame,
234247

235248
@override
236249
def fit(self, data: ForecastInputDataset, data_val: ForecastInputDataset | None = None) -> None:
237-
# Fit the target scaler
238-
target: np.ndarray = np.asarray(data.target_series.values)
239-
self._target_scaler.fit(target.reshape(-1, 1))
250+
# Data checks
251+
if data.data.isna().any().any():
252+
raise InputValidationError("There are nan values in the input data. Use imputation transform to fix them.")
253+
254+
# Fit the scalers
255+
self._target_scaler.fit(data.target_series.to_frame())
240256

241257
# Prepare training data
242258
input_data, target, sample_weight = self._prepare_fit_input(data)
@@ -264,11 +280,15 @@ def predict(self, data: ForecastInputDataset) -> ForecastDataset:
264280
if not self.is_fitted:
265281
raise NotFittedError(self.__class__.__name__)
266282

283+
# Data checks
284+
if data.input_data().isna().any().any():
285+
raise InputValidationError("There are nan values in the input data. Use imputation transform to fix them.")
286+
267287
# Get input features for prediction
268288
input_data: pd.DataFrame = data.input_data(start=data.forecast_start)
269289

270290
# Generate predictions
271-
predictions_array: np.ndarray = self._gblinear_model.predict(input_data)
291+
predictions_array: np.ndarray = self._gblinear_model.predict(input_data).reshape(-1, len(self.config.quantiles))
272292

273293
# Inverse transform the scaled predictions
274294
predictions_array = self._target_scaler.inverse_transform(predictions_array)

packages/openstef-models/src/openstef_models/models/forecasting/xgboost_forecaster.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
comprehensive hyperparameter control for production forecasting workflows.
1010
"""
1111

12-
from functools import partial
1312
from typing import Literal, override
1413

1514
import numpy as np
@@ -22,7 +21,12 @@
2221
from openstef_core.mixins import HyperParams
2322
from openstef_models.explainability.mixins import ExplainableForecaster
2423
from openstef_models.models.forecasting.forecaster import Forecaster, ForecasterConfig
25-
from openstef_models.utils.loss_functions import OBJECTIVE_MAP, ObjectiveFunctionType, xgb_prepare_target_for_objective
24+
from openstef_models.utils.evaluation_functions import EvaluationFunctionType, get_evaluation_function
25+
from openstef_models.utils.loss_functions import (
26+
ObjectiveFunctionType,
27+
get_objective_function,
28+
xgb_prepare_target_for_objective,
29+
)
2630

2731
try:
2832
import xgboost as xgb
@@ -61,7 +65,7 @@ class XGBoostHyperParams(HyperParams):
6165

6266
# Core Tree Boosting Parameters
6367
n_estimators: int = Field(
64-
default=500,
68+
default=100,
6569
description="Number of boosting rounds/trees to fit. Higher values may improve performance but "
6670
"increase training time and risk overfitting.",
6771
)
@@ -91,6 +95,11 @@ class XGBoostHyperParams(HyperParams):
9195
default="pinball_loss",
9296
description="Objective function for training. 'pinball_loss' is recommended for probabilistic forecasting.",
9397
)
98+
evaluation_metric: EvaluationFunctionType = Field(
99+
default="mean_pinball_loss",
100+
description="Metric used for evaluation during training. Defaults to 'mean_pinball_loss' "
101+
"for quantile regression.",
102+
)
94103

95104
# Regularization
96105
reg_alpha: float = Field(
@@ -149,10 +158,10 @@ class XGBoostHyperParams(HyperParams):
149158

150159
# General Parameters
151160
random_state: int | None = Field(
152-
default=None, alias="seed", description="Random seed for reproducibility. Controls tree structure randomness."
161+
default=42, description="Random seed for reproducibility. Controls tree structure randomness."
153162
)
154163
early_stopping_rounds: int | None = Field(
155-
default=10,
164+
default=None,
156165
description="Training will stop if performance doesn't improve for this many rounds. Requires validation data.",
157166
)
158167
use_target_scaling: bool = Field(
@@ -192,7 +201,7 @@ class XGBoostForecasterConfig(ForecasterConfig):
192201
n_jobs: int = Field(
193202
default=1, description="Number of parallel threads for tree construction. -1 uses all available cores."
194203
)
195-
verbosity: Literal[0, 1, 2, 3] = Field(
204+
verbosity: Literal[0, 1, 2, 3, True] = Field(
196205
default=1, description="Verbosity level. 0=silent, 1=warning, 2=info, 3=debug"
197206
)
198207

@@ -262,8 +271,6 @@ def __init__(self, config: XGBoostForecasterConfig) -> None:
262271
"""
263272
self._config = config
264273

265-
objective = partial(OBJECTIVE_MAP[self._config.hyperparams.objective], quantiles=self._config.quantiles)
266-
267274
self._xgboost_model = xgb.XGBRegressor(
268275
# Multi-output configuration
269276
multi_strategy="one_output_per_tree",
@@ -297,7 +304,13 @@ def __init__(self, config: XGBoostForecasterConfig) -> None:
297304
# Early stopping handled in fit method
298305
early_stopping_rounds=self._config.hyperparams.early_stopping_rounds,
299306
# Objective
300-
objective=objective,
307+
objective=get_objective_function(
308+
function_type=self._config.hyperparams.objective, quantiles=self._config.quantiles
309+
),
310+
eval_metric=get_evaluation_function(
311+
function_type=self._config.hyperparams.evaluation_metric, quantiles=self._config.quantiles
312+
),
313+
disable_default_eval_metric=True,
301314
)
302315
self._target_scaler = StandardScaler() if self._config.hyperparams.use_target_scaling else None
303316

@@ -372,7 +385,7 @@ def predict(self, data: ForecastInputDataset) -> ForecastDataset:
372385
input_data: pd.DataFrame = data.input_data(start=data.forecast_start)
373386

374387
# Generate predictions
375-
predictions_array: np.ndarray = self._xgboost_model.predict(input_data)
388+
predictions_array: np.ndarray = self._xgboost_model.predict(input_data).reshape(-1, len(self.config.quantiles))
376389

377390
# Inverse transform the scaled predictions
378391
if self._target_scaler is not None:
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
2+
#
3+
# SPDX-License-Identifier: MPL-2.0
4+
"""Utility functions for evaluation metrics in forecasting models."""
5+
6+
from collections.abc import Callable
7+
from functools import partial
8+
from typing import Any, Literal
9+
10+
import numpy as np
11+
12+
from openstef_beam.metrics.metrics_probabilistic import mean_pinball_loss
13+
from openstef_core.types import Quantile
14+
15+
type EvaluationFunctionType = Literal["mean_pinball_loss"]
16+
17+
EVALUATION_MAP = {
18+
"mean_pinball_loss": mean_pinball_loss,
19+
}
20+
21+
22+
def get_evaluation_function(
23+
function_type: EvaluationFunctionType, quantiles: list[Quantile] | None = None, **kwargs: Any
24+
) -> Callable[[np.ndarray, np.ndarray], float]:
25+
eval_metric = partial(EVALUATION_MAP[function_type], quantiles=quantiles, **kwargs)
26+
eval_metric.__name__ = function_type # pyright: ignore[reportAttributeAccessIssue]
27+
return eval_metric
28+
29+
30+
__all__ = ["EVALUATION_MAP", "EvaluationFunctionType"]

0 commit comments

Comments
 (0)