-
Notifications
You must be signed in to change notification settings - Fork 75
Expand file tree
/
Copy pathskforecast_forecasting.py
More file actions
240 lines (194 loc) · 7.36 KB
/
skforecast_forecasting.py
File metadata and controls
240 lines (194 loc) · 7.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
"""Experiment adapter for skforecast backtesting experiments."""
# copyright: hyperactive developers, MIT License (see LICENSE file)
import copy
from hyperactive.base import BaseExperiment
class SkforecastExperiment(BaseExperiment):
"""Experiment adapter for skforecast backtesting experiments.
This class is used to perform backtesting experiments using a given
skforecast forecaster. It allows for hyperparameter tuning and evaluation of
the model's performance.
Parameters
----------
forecaster : skforecast forecaster
skforecast forecaster to benchmark.
y : pandas Series
Target time series used in the evaluation experiment.
exog : pandas Series or DataFrame, default=None
Exogenous variable/s used in the evaluation experiment.
steps : int
Number of steps to predict.
metric : str or callable
Metric used to quantify the goodness of fit of the model.
If string, it must be a metric name allowed by skforecast
(e.g., 'mean_squared_error').
If callable, it must take (y_true, y_pred) and return a float.
initial_train_size : int
Number of samples in the initial training set.
refit : bool, default=False
Whether to re-fit the forecaster in each iteration.
fixed_train_size : bool, default=False
If True, the train size doesn't increase but moves by `steps` in each iteration.
gap : int, default=0
Number of samples to exclude from the end of each training set and the
start of the test set.
allow_incomplete_fold : bool, default=True
If True, the last fold is allowed to have fewer samples than `steps`.
return_best : bool, default=False
If True, the best model is returned.
n_jobs : int or 'auto', default="auto"
Number of jobs to run in parallel.
verbose : bool, default=False
Print summary figures.
show_progress : bool, default=False
Whether to show a progress bar.
higher_is_better : bool, default=False
Whether higher metric values indicate better performance.
Set to False (default) for error metrics like MSE, MAE, MAPE where
lower values are better. Set to True for metrics like R2 where
higher values indicate better model performance.
"""
_tags = {
"authors": ["Omswastik-11", "JoaquinAmatRodrigo"],
"maintainers": ["Omswastik-11", "fkiraly", "JoaquinAmatRodrigo", "SimonBlanke"],
"python_dependencies": "skforecast",
}
def __init__(
self,
forecaster,
y,
steps,
metric,
initial_train_size,
exog=None,
refit=False,
fixed_train_size=False,
gap=0,
allow_incomplete_fold=True,
return_best=False,
n_jobs="auto",
verbose=False,
show_progress=False,
higher_is_better=False,
):
self.forecaster = forecaster
self.y = y
self.steps = steps
self.metric = metric
self.initial_train_size = initial_train_size
self.exog = exog
self.refit = refit
self.fixed_train_size = fixed_train_size
self.gap = gap
self.allow_incomplete_fold = allow_incomplete_fold
self.return_best = return_best
self.n_jobs = n_jobs
self.verbose = verbose
self.show_progress = show_progress
self.higher_is_better = higher_is_better
super().__init__()
# Set the optimization direction based on higher_is_better parameter
higher_or_lower = "higher" if higher_is_better else "lower"
self.set_tags(**{"property:higher_or_lower_is_better": higher_or_lower})
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the parameter set to return.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance,
i.e., MyClass(**params) or MyClass(**params[i]) creates a valid test
instance.
create_test_instance uses the first (or only) dictionary in `params`
"""
from skbase.utils.dependencies import _check_soft_dependencies
if not _check_soft_dependencies("skforecast", severity="none"):
return []
import numpy as np
import pandas as pd
from skforecast.recursive import ForecasterRecursive
from sklearn.ensemble import RandomForestRegressor
forecaster = ForecasterRecursive(
regressor=RandomForestRegressor(random_state=123),
lags=2,
)
y = pd.Series(
np.random.randn(20),
index=pd.date_range(start="2020-01-01", periods=20, freq="D"),
name="y",
)
params = {
"forecaster": forecaster,
"y": y,
"steps": 3,
"metric": "mean_squared_error",
"initial_train_size": 10,
}
return [params]
@classmethod
def _get_score_params(cls):
"""Return settings for testing score/evaluate functions. Used in tests only.
Returns a list, the i-th element should be valid arguments for
self.evaluate and self.score, of an instance constructed with
self.get_test_params()[i].
Returns
-------
list of dict
The parameters to be used for scoring.
"""
return [{"n_estimators": 5}]
def _paramnames(self):
"""Return the parameter names of the search.
Returns
-------
list of str
The parameter names of the search parameters.
"""
return list(self.forecaster.get_params().keys())
def _evaluate(self, params):
"""Evaluate the parameters.
Parameters
----------
params : dict with string keys
Parameters to evaluate.
Returns
-------
float
The value of the parameters as per evaluation.
dict
Additional metadata about the search.
"""
from skforecast.model_selection import TimeSeriesFold, backtesting_forecaster
forecaster = copy.deepcopy(self.forecaster)
forecaster.set_params(params)
cv = TimeSeriesFold(
steps=self.steps,
initial_train_size=self.initial_train_size,
refit=self.refit,
fixed_train_size=self.fixed_train_size,
gap=self.gap,
allow_incomplete_fold=self.allow_incomplete_fold,
)
results, _ = backtesting_forecaster(
forecaster=forecaster,
y=self.y,
cv=cv,
metric=self.metric,
exog=self.exog,
n_jobs=self.n_jobs,
verbose=self.verbose,
show_progress=self.show_progress,
)
if isinstance(self.metric, str):
metric_name = self.metric
else:
metric_name = (
self.metric.__name__ if hasattr(self.metric, "__name__") else "score"
)
# backtesting_forecaster returns a DataFrame
res_float = results[metric_name].iloc[0]
return res_float, {"results": results}