Transformer Selection
¶

In [19]:
import warnings
warnings.filterwarnings('ignore')

Load Data¶

In [20]:
from sktime.datasets import load_longley
_, y = load_longley() # 16*5
y.head()
Out[20]:
GNPDEFL GNP UNEMP ARMED POP
Period
1947 83.0 234289.0 2356.0 1590.0 107608.0
1948 88.5 259426.0 2325.0 1456.0 108632.0
1949 88.2 258054.0 3682.0 1616.0 109773.0
1950 89.5 284599.0 3351.0 1650.0 110929.0
1951 96.2 328975.0 2099.0 3099.0 112075.0
In [21]:
from sktime.forecasting.model_selection import temporal_train_test_split
y_train, y_test = temporal_train_test_split(y, test_size=4) # hold out last 4 years

Create Pipeline¶

In [22]:
from sklearn.preprocessing import StandardScaler

from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.forecasting.model_selection import ForecastingGridSearchCV, SlidingWindowSplitter

from sktime.transformations.compose import OptionalPassthrough
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.detrend import Deseasonalizer

from sktime.forecasting.var import VAR
In [23]:
# decide if use a transformer with OptionalPassthrough
# passthrough, False, use the transformer within, True, ignores the transformer within
pipe = TransformedTargetForecaster(
    steps=[
        ("deseasonalizer", OptionalPassthrough(Deseasonalizer())),
        ("scaler", OptionalPassthrough(TabularToSeriesAdaptor(StandardScaler()))),
        ("forecaster", VAR()),
    ]
)

pipe.get_params()
Out[23]:
{'steps': [('deseasonalizer',
   OptionalPassthrough(transformer=Deseasonalizer())),
  ('scaler',
   OptionalPassthrough(transformer=TabularToSeriesAdaptor(transformer=StandardScaler()))),
  ('forecaster', VAR())],
 'deseasonalizer': OptionalPassthrough(transformer=Deseasonalizer()),
 'scaler': OptionalPassthrough(transformer=TabularToSeriesAdaptor(transformer=StandardScaler())),
 'forecaster': VAR(),
 'deseasonalizer__passthrough': False,
 'deseasonalizer__transformer__model': 'additive',
 'deseasonalizer__transformer__sp': 1,
 'deseasonalizer__transformer': Deseasonalizer(),
 'scaler__passthrough': False,
 'scaler__transformer__fit_in_transform': False,
 'scaler__transformer__transformer__copy': True,
 'scaler__transformer__transformer__with_mean': True,
 'scaler__transformer__transformer__with_std': True,
 'scaler__transformer__transformer': StandardScaler(),
 'scaler__transformer': TabularToSeriesAdaptor(transformer=StandardScaler()),
 'forecaster__dates': None,
 'forecaster__freq': None,
 'forecaster__ic': None,
 'forecaster__maxlags': None,
 'forecaster__method': 'ols',
 'forecaster__missing': 'none',
 'forecaster__random_state': None,
 'forecaster__trend': 'c',
 'forecaster__verbose': False}

Select Transformer¶

In [24]:
cv = SlidingWindowSplitter(step_length=10)

param_grid = {
    "deseasonalizer__passthrough": [True, False],
    "scaler__transformer__transformer__with_mean": [True, False],
    "scaler__passthrough": [True, False],
}

gscv = ForecastingGridSearchCV(forecaster=pipe, param_grid=param_grid, cv=cv, n_jobs=-1)

selection = gscv.fit(y_train)

selection.best_params_ # drop deseasonalizer, keep scaler
Out[24]:
{'deseasonalizer__passthrough': True,
 'scaler__passthrough': False,
 'scaler__transformer__transformer__with_mean': True}
In [25]:
model = selection.best_forecaster_
model
Out[25]:
TransformedTargetForecaster(steps=[('deseasonalizer',
                                    OptionalPassthrough(passthrough=True,
                                                        transformer=Deseasonalizer())),
                                   ('scaler',
                                    OptionalPassthrough(transformer=TabularToSeriesAdaptor(transformer=StandardScaler()))),
                                   ('forecaster', VAR())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
TransformedTargetForecaster(steps=[('deseasonalizer',
                                    OptionalPassthrough(passthrough=True,
                                                        transformer=Deseasonalizer())),
                                   ('scaler',
                                    OptionalPassthrough(transformer=TabularToSeriesAdaptor(transformer=StandardScaler()))),
                                   ('forecaster', VAR())])

Forecasting¶

In [26]:
import numpy as np 

fh = np.arange(1, 5)
y_pred = model.predict(fh)

Evaluation¶

In [27]:
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
mean_absolute_percentage_error(y_test, y_pred, symmetric=False, multioutput = 'raw_values')
Out[27]:
array([0.02865278, 0.03085247, 0.22782484, 0.15709783, 0.00571189])

Visualization¶

In [28]:
import matplotlib.pyplot as plt

def get_plots(y_train, y_test, y_pred):
    columns = list(y_train.columns)
    
    for column in columns:
        fig, ax = plt.subplots(figsize=(8, 6))
        line1, = ax.plot(y_train.index.to_timestamp(), y_train[column], 'bo-')
        line2, = ax.plot(y_test.index.to_timestamp(), y_test[column], 'go-')
        line3, = ax.plot(y_pred.index.to_timestamp(), y_pred[column], 'yo-')
        ax.legend((line1, line2, line3), ('y', 'y_test', 'y_pred'))
        ax.set_ylabel(column)
    
# visualization
get_plots(y_train, y_test, y_pred)

Reference¶

  • Forecasting