import warnings
warnings.filterwarnings('ignore')
from sktime.datasets import load_longley
_, y = load_longley() # 16*5
y.head()
GNPDEFL | GNP | UNEMP | ARMED | POP | |
---|---|---|---|---|---|
Period | |||||
1947 | 83.0 | 234289.0 | 2356.0 | 1590.0 | 107608.0 |
1948 | 88.5 | 259426.0 | 2325.0 | 1456.0 | 108632.0 |
1949 | 88.2 | 258054.0 | 3682.0 | 1616.0 | 109773.0 |
1950 | 89.5 | 284599.0 | 3351.0 | 1650.0 | 110929.0 |
1951 | 96.2 | 328975.0 | 2099.0 | 3099.0 | 112075.0 |
from sktime.forecasting.model_selection import temporal_train_test_split
y_train, y_test = temporal_train_test_split(y, test_size=4) # hold out last 4 years
from sktime.registry import all_estimators
all_estimators(filter_tags={"scitype:y": ["multivariate"]}, as_dataframe=True)
name | estimator | |
---|---|---|
0 | DynamicFactor | <class 'sktime.forecasting.dynamic_factor.Dyna... |
1 | VAR | <class 'sktime.forecasting.var.VAR'> |
2 | VARMAX | <class 'sktime.forecasting.varmax.VARMAX'> |
3 | VECM | <class 'sktime.forecasting.vecm.VECM'> |
from sktime.forecasting.compose import EnsembleForecaster
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.var import VAR
from sktime.forecasting.varmax import VARMAX
forecaster = EnsembleForecaster([
('VARMAX', VARMAX()),
('VAR', VAR())
])
import numpy as np
fh = np.arange(1, 5)
forecaster.fit(y=y_train, fh=fh)
y_pred = forecaster.predict(fh)
y_test
GNPDEFL | GNP | UNEMP | ARMED | POP | |
---|---|---|---|---|---|
Period | |||||
1959 | 112.6 | 482704.0 | 3813.0 | 2552.0 | 123366.0 |
1960 | 114.2 | 502601.0 | 3931.0 | 2514.0 | 125368.0 |
1961 | 115.7 | 518173.0 | 4806.0 | 2572.0 | 127852.0 |
1962 | 116.9 | 554894.0 | 4007.0 | 2827.0 | 130081.0 |
# re-order the columns of prediction
y_pred = y_pred.iloc[:,[2, 1, 4, 0, 3]]
y_pred
GNPDEFL | GNP | UNEMP | ARMED | POP | |
---|---|---|---|---|---|
Period | |||||
1959 | 107.626063 | 442613.144569 | 4937.895644 | 2324.279835 | 121918.544367 |
1960 | 110.693928 | 473809.042146 | 3539.504608 | 2549.692903 | 122290.429056 |
1961 | 114.258154 | 487410.106616 | 3457.663985 | 2922.335983 | 123258.406143 |
1962 | 113.347215 | 478001.581723 | 4634.740799 | 2772.840933 | 123961.355146 |
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
mean_absolute_percentage_error(y_test, y_pred, symmetric=False, multioutput = 'raw_values')
array([0.02943207, 0.08457 , 0.20795535, 0.06469975, 0.0298138 ])
import matplotlib.pyplot as plt
def get_plots(y_train, y_test, y_pred):
columns = list(y_train.columns)
for column in columns:
fig, ax = plt.subplots(figsize=(8, 6))
line1, = ax.plot(y_train.index.to_timestamp(), y_train[column], 'bo-')
line2, = ax.plot(y_test.index.to_timestamp(), y_test[column], 'go-')
line3, = ax.plot(y_pred.index.to_timestamp(), y_pred[column], 'yo-')
ax.legend((line1, line2, line3), ('y', 'y_test', 'y_pred'))
ax.set_ylabel(column)
# visualization
get_plots(y_train, y_test, y_pred)