Hyperactive/src/hyperactive/experiment/integrations/skforecast_forecasting.py at a427c6402028a55ff173c11740ca12d5edfbf582 · hyperactive-project/Hyperactive · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
"""Experiment adapter for skforecast backtesting experiments."""
# copyright: hyperactive developers, MIT License (see LICENSE file)

import copy

from hyperactive.base import BaseExperiment


class SkforecastExperiment(BaseExperiment):
    """Experiment adapter for skforecast backtesting experiments.

    This class is used to perform backtesting experiments using a given
    skforecast forecaster. It allows for hyperparameter tuning and evaluation of
    the model's performance.

    Parameters
    ----------
    forecaster : skforecast forecaster
        skforecast forecaster to benchmark.

    y : pandas Series
        Target time series used in the evaluation experiment.

    exog : pandas Series or DataFrame, default=None
        Exogenous variable/s used in the evaluation experiment.

    steps : int
        Number of steps to predict.

    metric : str or callable
        Metric used to quantify the goodness of fit of the model.
        If string, it must be a metric name allowed by skforecast
        (e.g., 'mean_squared_error').
        If callable, it must take (y_true, y_pred) and return a float.

    initial_train_size : int
        Number of samples in the initial training set.

    refit : bool, default=False
        Whether to re-fit the forecaster in each iteration.

    fixed_train_size : bool, default=False
        If True, the train size doesn't increase but moves by `steps` in each iteration.

    gap : int, default=0
        Number of samples to exclude from the end of each training set and the
        start of the test set.

    allow_incomplete_fold : bool, default=True
        If True, the last fold is allowed to have fewer samples than `steps`.

    return_best : bool, default=False
        If True, the best model is returned.

    n_jobs : int or 'auto', default="auto"
        Number of jobs to run in parallel.

    verbose : bool, default=False
        Print summary figures.

    show_progress : bool, default=False
        Whether to show a progress bar.

    higher_is_better : bool, default=False
        Whether higher metric values indicate better performance.
        Set to False (default) for error metrics like MSE, MAE, MAPE where
        lower values are better. Set to True for metrics like R2 where
        higher values indicate better model performance.
    """

    _tags = {
        "authors": ["Omswastik-11", "JoaquinAmatRodrigo"],
        "maintainers": ["Omswastik-11", "fkiraly", "JoaquinAmatRodrigo", "SimonBlanke"],
        "python_dependencies": "skforecast",
    }

    def __init__(
        self,
        forecaster,
        y,
        steps,
        metric,
        initial_train_size,
        exog=None,
        refit=False,
        fixed_train_size=False,
        gap=0,
        allow_incomplete_fold=True,
        return_best=False,
        n_jobs="auto",
        verbose=False,
        show_progress=False,
        higher_is_better=False,
    ):
        self.forecaster = forecaster
        self.y = y
        self.steps = steps
        self.metric = metric
        self.initial_train_size = initial_train_size
        self.exog = exog
        self.refit = refit
        self.fixed_train_size = fixed_train_size
        self.gap = gap
        self.allow_incomplete_fold = allow_incomplete_fold
        self.return_best = return_best
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.show_progress = show_progress
        self.higher_is_better = higher_is_better

        super().__init__()

        # Set the optimization direction based on higher_is_better parameter
        higher_or_lower = "higher" if higher_is_better else "lower"
        self.set_tags(**{"property:higher_or_lower_is_better": higher_or_lower})

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the estimator.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the parameter set to return.

        Returns
        -------
        params : dict or list of dict, default = {}
            Parameters to create testing instances of the class
            Each dict are parameters to construct an "interesting" test instance,
            i.e., MyClass(**params) or MyClass(**params[i]) creates a valid test
            instance.
            create_test_instance uses the first (or only) dictionary in `params`
        """
        from skbase.utils.dependencies import _check_soft_dependencies

        if not _check_soft_dependencies("skforecast", severity="none"):
            return []

        import numpy as np
        import pandas as pd
        from skforecast.recursive import ForecasterRecursive
        from sklearn.ensemble import RandomForestRegressor

        forecaster = ForecasterRecursive(
            regressor=RandomForestRegressor(random_state=123),
            lags=2,
        )

        y = pd.Series(
            np.random.randn(20),
            index=pd.date_range(start="2020-01-01", periods=20, freq="D"),
            name="y",
        )

        params = {
            "forecaster": forecaster,
            "y": y,
            "steps": 3,
            "metric": "mean_squared_error",
            "initial_train_size": 10,
        }
        return [params]

    @classmethod
    def _get_score_params(cls):
        """Return settings for testing score/evaluate functions. Used in tests only.

        Returns a list, the i-th element should be valid arguments for
        self.evaluate and self.score, of an instance constructed with
        self.get_test_params()[i].

        Returns
        -------
        list of dict
            The parameters to be used for scoring.
        """
        return [{"n_estimators": 5}]

    def _paramnames(self):
        """Return the parameter names of the search.

        Returns
        -------
        list of str
            The parameter names of the search parameters.
        """
        return list(self.forecaster.get_params().keys())

    def _evaluate(self, params):
        """Evaluate the parameters.

        Parameters
        ----------
        params : dict with string keys
            Parameters to evaluate.

        Returns
        -------
        float
            The value of the parameters as per evaluation.
        dict
            Additional metadata about the search.
        """
        from skforecast.model_selection import TimeSeriesFold, backtesting_forecaster

        forecaster = copy.deepcopy(self.forecaster)
        forecaster.set_params(params)

        cv = TimeSeriesFold(
            steps=self.steps,
            initial_train_size=self.initial_train_size,
            refit=self.refit,
            fixed_train_size=self.fixed_train_size,
            gap=self.gap,
            allow_incomplete_fold=self.allow_incomplete_fold,
        )

        results, _ = backtesting_forecaster(
            forecaster=forecaster,
            y=self.y,
            cv=cv,
            metric=self.metric,
            exog=self.exog,
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            show_progress=self.show_progress,
        )

        if isinstance(self.metric, str):
            metric_name = self.metric
        else:
            metric_name = (
                self.metric.__name__ if hasattr(self.metric, "__name__") else "score"
            )

        # backtesting_forecaster returns a DataFrame
        res_float = results[metric_name].iloc[0]

        return res_float, {"results": results}