Model Ensembling
¶

In [1]:
import warnings
warnings.filterwarnings('ignore')

Load Data¶

In [2]:
from sktime.datasets import load_longley
_, y = load_longley() # 16*5
y.head()
Out[2]:
GNPDEFL GNP UNEMP ARMED POP
Period
1947 83.0 234289.0 2356.0 1590.0 107608.0
1948 88.5 259426.0 2325.0 1456.0 108632.0
1949 88.2 258054.0 3682.0 1616.0 109773.0
1950 89.5 284599.0 3351.0 1650.0 110929.0
1951 96.2 328975.0 2099.0 3099.0 112075.0
In [3]:
from sktime.forecasting.model_selection import temporal_train_test_split
y_train, y_test = temporal_train_test_split(y, test_size=4) # hold out last 4 years

Build Ensemble Model¶

In [19]:
from sktime.registry import all_estimators
all_estimators(filter_tags={"scitype:y": ["multivariate"]}, as_dataframe=True)
Out[19]:
name estimator
0 DynamicFactor <class 'sktime.forecasting.dynamic_factor.Dyna...
1 VAR <class 'sktime.forecasting.var.VAR'>
2 VARMAX <class 'sktime.forecasting.varmax.VARMAX'>
3 VECM <class 'sktime.forecasting.vecm.VECM'>
In [21]:
from sktime.forecasting.compose import EnsembleForecaster
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.var import VAR
from sktime.forecasting.varmax import VARMAX

forecaster = EnsembleForecaster([
        ('VARMAX', VARMAX()), 
        ('VAR', VAR())
    ])

Train Ensemble Model¶

In [26]:
import numpy as np

fh = np.arange(1, 5)
forecaster.fit(y=y_train, fh=fh)

y_pred = forecaster.predict(fh)
In [27]:
y_test
Out[27]:
GNPDEFL GNP UNEMP ARMED POP
Period
1959 112.6 482704.0 3813.0 2552.0 123366.0
1960 114.2 502601.0 3931.0 2514.0 125368.0
1961 115.7 518173.0 4806.0 2572.0 127852.0
1962 116.9 554894.0 4007.0 2827.0 130081.0
In [29]:
# re-order the columns of prediction
y_pred = y_pred.iloc[:,[2, 1, 4, 0, 3]]
In [30]:
y_pred
Out[30]:
GNPDEFL GNP UNEMP ARMED POP
Period
1959 107.626063 442613.144569 4937.895644 2324.279835 121918.544367
1960 110.693928 473809.042146 3539.504608 2549.692903 122290.429056
1961 114.258154 487410.106616 3457.663985 2922.335983 123258.406143
1962 113.347215 478001.581723 4634.740799 2772.840933 123961.355146

Evaluation¶

In [31]:
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
mean_absolute_percentage_error(y_test, y_pred, symmetric=False, multioutput = 'raw_values')
Out[31]:
array([0.02943207, 0.08457   , 0.20795535, 0.06469975, 0.0298138 ])

Visualization¶

In [33]:
import matplotlib.pyplot as plt

def get_plots(y_train, y_test, y_pred):
    columns = list(y_train.columns)
    
    for column in columns:
        fig, ax = plt.subplots(figsize=(8, 6))
        line1, = ax.plot(y_train.index.to_timestamp(), y_train[column], 'bo-')
        line2, = ax.plot(y_test.index.to_timestamp(), y_test[column], 'go-')
        line3, = ax.plot(y_pred.index.to_timestamp(), y_pred[column], 'yo-')
        ax.legend((line1, line2, line3), ('y', 'y_test', 'y_pred'))
        ax.set_ylabel(column)
    
# visualization
get_plots(y_train, y_test, y_pred)

Reference¶

  • Forecasting Tutorial